Commit 172f3e19 authored by Cyrill Gorcunov's avatar Cyrill Gorcunov Committed by Pavel Emelyanov

freeze -- Wait for exiting tasks to complete

There is a small race between exitings tasks and cgroup freezer:
it can report a pid of exiting process which we will consider
as a zombie and refuse to checkpoint. Lets do a trick instead:
if we meets such task just wait a bit and repeate a freezing.

v3: Use EAGAIN instead of EPERM since EPERM collide
    with general -1 error code.

travis-ci: success for freeze -- Wait for exiting tasks to complete (rev3)
Signed-off-by: 's avatarCyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: 's avatarPavel Emelyanov <xemul@virtuozzo.com>
parent 4f329dd4
...@@ -137,11 +137,16 @@ static int seize_cgroup_tree(char *root_path, const char *state) ...@@ -137,11 +137,16 @@ static int seize_cgroup_tree(char *root_path, const char *state)
snprintf(buf, sizeof(buf), "/proc/%d/exe", pid); snprintf(buf, sizeof(buf), "/proc/%d/exe", pid);
if (stat(buf, &st) == -1 && errno == ENOENT) if (stat(buf, &st) == -1 && errno == ENOENT)
continue; continue;
/*
/* fails when meets a zombie */ * fails when meets a zombie, or eixting process:
* there is a small race in a kernel -- the process
* may start exiting and we are trying to freeze it
* before it compete exit procedure. The caller simply
* should wait a bit and try freezing again.
*/
pr_err("zombie found while seizing\n"); pr_err("zombie found while seizing\n");
fclose(f); fclose(f);
return -1; return -EAGAIN;
} }
} }
fclose(f); fclose(f);
...@@ -154,6 +159,7 @@ static int seize_cgroup_tree(char *root_path, const char *state) ...@@ -154,6 +159,7 @@ static int seize_cgroup_tree(char *root_path, const char *state)
while ((de = readdir(dir))) { while ((de = readdir(dir))) {
struct stat st; struct stat st;
int ret;
if (dir_dots(de)) if (dir_dots(de))
continue; continue;
...@@ -168,10 +174,10 @@ static int seize_cgroup_tree(char *root_path, const char *state) ...@@ -168,10 +174,10 @@ static int seize_cgroup_tree(char *root_path, const char *state)
if (!S_ISDIR(st.st_mode)) if (!S_ISDIR(st.st_mode))
continue; continue;
ret = seize_cgroup_tree(path, state);
if (seize_cgroup_tree(path, state) < 0) { if (ret < 0) {
closedir(dir); closedir(dir);
return -1; return ret;
} }
} }
closedir(dir); closedir(dir);
...@@ -325,7 +331,7 @@ static int freeze_processes(void) ...@@ -325,7 +331,7 @@ static int freeze_processes(void)
static const unsigned long step_ms = 100; static const unsigned long step_ms = 100;
unsigned long nr_attempts = (opts.timeout * 1000000) / step_ms; unsigned long nr_attempts = (opts.timeout * 1000000) / step_ms;
unsigned long i; unsigned long i = 0;
const struct timespec req = { const struct timespec req = {
.tv_nsec = step_ms * 1000000, .tv_nsec = step_ms * 1000000,
...@@ -371,7 +377,7 @@ static int freeze_processes(void) ...@@ -371,7 +377,7 @@ static int freeze_processes(void)
* not read @tasks pids while freezer in * not read @tasks pids while freezer in
* transition stage. * transition stage.
*/ */
for (i = 0; i <= nr_attempts; i++) { for (; i <= nr_attempts; i++) {
state = get_freezer_state(fd); state = get_freezer_state(fd);
if (!state) { if (!state) {
close(fd); close(fd);
...@@ -395,7 +401,18 @@ static int freeze_processes(void) ...@@ -395,7 +401,18 @@ static int freeze_processes(void)
pr_debug("freezing processes: %lu attempts done\n", i); pr_debug("freezing processes: %lu attempts done\n", i);
} }
exit_code = seize_cgroup_tree(opts.freeze_cgroup, state); /*
* Pay attention on @i variable -- it's continuation.
*/
for (; i <= nr_attempts; i++) {
exit_code = seize_cgroup_tree(opts.freeze_cgroup, state);
if (exit_code == -EAGAIN) {
if (alarm_timeouted())
goto err;
nanosleep(&req, NULL);
} else
break;
}
err: err:
if (exit_code == 0 || freezer_thawed) { if (exit_code == 0 || freezer_thawed) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment