Commit 9fae23fb authored by Cyrill Gorcunov's avatar Cyrill Gorcunov Committed by Pavel Emelyanov

seize: Take --timeout option into account when freezing processes

When we're freezing processes we don't count on anything but
rather do 5 attempts with constantly increasing delay.

Lets rather do the following:

 - take --timeout option into account (which is 5 seconds
   by default) and split it into 100 ms steps;

 - when frezing processes check freezer status every 100 ms.

Same time it looks that 5 seconds by default is too small
for high loaded containers. Lets increase it to 10 seconds
by default.

[ skinsbursky@:
Frankly speaking, in this particular case increasing intervals are not nice.
This is not a network issue or something.
Usually freezing takes less than a second, but more, that, say 200ms.
Otherwise it takes quite o lot of time.
If step size is growing all the time, in most of the cases criu will
waste hundreds of milliseconds between iterX (say, third) and (iterX+1)
because of the growing step size.
100ms step looks solid enough: not to small to produce a lot of syscalls
and not to large to waste a lot of time.
With previous scheme freezing was usually taking half a second more that
it should because of this growing step.

[ gorcunov@:
You won't belive, but been able to sepcify --timeout 0 here allowed
me and Stas to catch serieous problem in freezer code.

https://lkml.org/lkml/2016/8/3/317

Without this feature we would have to patch criu instead. So you know,
this would be great to keep it for catching more kernel bugs ;)
Reported-by: 's avatarStanislav Kinsburskiy <skinsbursky@virtuozzo.com>
Signed-off-by: 's avatarCyrill Gorcunov <gorcunov@virtuozzo.com>
Signed-off-by: 's avatarPavel Emelyanov <xemul@virtuozzo.com>
parent 5a43e55e
......@@ -38,7 +38,7 @@ struct cg_root_opt {
*/
#define DEFAULT_GHOST_LIMIT (1 << 20)
#define DEFAULT_TIMEOUT 5
#define DEFAULT_TIMEOUT 10
struct irmap;
......
......@@ -319,10 +319,30 @@ static int log_unfrozen_stacks(char *root)
static int freeze_processes(void)
{
int i, fd, exit_code = -1;
int fd, exit_code = -1;
char path[PATH_MAX];
const char *state = thawed;
static const unsigned long step_ms = 100;
unsigned long nr_attempts = (opts.timeout * 1000000) / step_ms;
unsigned long i;
const struct timespec req = {
.tv_nsec = step_ms * 1000000,
.tv_sec = 0,
};
if (unlikely(!nr_attempts)) {
/*
* If timeout is turned off, lets
* wait for at least 10 seconds.
*/
nr_attempts = (10 * 1000000) / step_ms;
}
pr_debug("freezing processes: %lu attempst with %lu ms steps\n",
nr_attempts, step_ms);
snprintf(path, sizeof(path), "%s/freezer.state", opts.freeze_cgroup);
fd = open(path, O_RDWR);
if (fd < 0) {
......@@ -350,10 +370,7 @@ static int freeze_processes(void)
* freezer.state.
* Here is one extra attempt to check that everything are frozen.
*/
for (i = 0; i <= NR_ATTEMPTS; i++) {
struct timespec req = {};
u64 timeout;
for (i = 0; i <= nr_attempts; i++) {
if (seize_cgroup_tree(opts.freeze_cgroup, state) < 0)
goto err;
......@@ -376,13 +393,10 @@ static int freeze_processes(void)
if (alarm_timeouted())
goto err;
timeout = 100000000 * (i + 1); /* 100 msec */
req.tv_nsec = timeout % 1000000000;
req.tv_sec = timeout / 1000000000;
nanosleep(&req, NULL);
}
if (i > NR_ATTEMPTS) {
if (i > nr_attempts) {
pr_err("Unable to freeze cgroup %s\n", opts.freeze_cgroup);
if (!pr_quelled(LOG_DEBUG))
......@@ -391,6 +405,7 @@ static int freeze_processes(void)
goto err;
}
pr_debug("freezing processes: %lu attempts done\n", i);
exit_code = 0;
err:
if (exit_code == 0 || freezer_thawed) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment