Commit 8ea02038 authored by Andrey Vagin's avatar Andrey Vagin Committed by Pavel Emelyanov

dump: use freezer cgroup to seize processes (v4)

Without using a freezer cgroup, we need to do a few iterations to catch
all tasks, because a new tasks can be born. If new tasks appear faster
than criu collects them, criu fails. The freezer cgroup allows to
solve this problem.

We freeze the freezer group, then attaches to tasks with ptrace and thaw
the freezer cgroup. We suppose that all tasks which are going to be
dumped in a specified freezer group.

v2: fix comments from Christopher
Reviewed-by: 's avatarChristopher Covington <cov@codeaurora.org>

v3: refactor task_seize

v4: fix comments from Pavel

Cc: Christopher Covington <cov@codeaurora.org>
Signed-off-by: 's avatarAndrey Vagin <avagin@openvz.org>
Signed-off-by: 's avatarPavel Emelyanov <xemul@parallels.com>
parent 69b093ad
...@@ -235,6 +235,7 @@ int main(int argc, char *argv[], char *envp[]) ...@@ -235,6 +235,7 @@ int main(int argc, char *argv[], char *envp[])
{ "enable-fs", required_argument, 0, 1065 }, { "enable-fs", required_argument, 0, 1065 },
{ "enable-external-sharing", no_argument, 0, 1066 }, { "enable-external-sharing", no_argument, 0, 1066 },
{ "enable-external-masters", no_argument, 0, 1067 }, { "enable-external-masters", no_argument, 0, 1067 },
{ "freeze-cgroup", required_argument, 0, 1068 },
{ }, { },
}; };
...@@ -465,6 +466,9 @@ int main(int argc, char *argv[], char *envp[]) ...@@ -465,6 +466,9 @@ int main(int argc, char *argv[], char *envp[])
case 1067: case 1067:
opts.enable_external_masters = true; opts.enable_external_masters = true;
break; break;
case 1068:
opts.freeze_cgroup = optarg;
break;
case 'M': case 'M':
{ {
char *aux; char *aux;
...@@ -676,6 +680,8 @@ usage: ...@@ -676,6 +680,8 @@ usage:
" 'cpu','fpu','all','ins','none'. To disable capability, prefix it with '^'.\n" " 'cpu','fpu','all','ins','none'. To disable capability, prefix it with '^'.\n"
" --exec-cmd execute the command specified after '--' on successful\n" " --exec-cmd execute the command specified after '--' on successful\n"
" restore making it the parent of the restored process\n" " restore making it the parent of the restored process\n"
" --freeze-cgroup\n"
" use cgroup freezer to collect processes\n"
"\n" "\n"
"* Special resources support:\n" "* Special resources support:\n"
" -x|--" USK_EXT_PARAM "inode,.." " allow external unix connections (optionally can be assign socket's inode that allows one-sided dump)\n" " -x|--" USK_EXT_PARAM "inode,.." " allow external unix connections (optionally can be assign socket's inode that allows one-sided dump)\n"
......
...@@ -57,6 +57,7 @@ struct cr_options { ...@@ -57,6 +57,7 @@ struct cr_options {
char *output; char *output;
char *root; char *root;
char *pidfile; char *pidfile;
char *freeze_cgroup;
struct list_head veth_pairs; struct list_head veth_pairs;
struct list_head scripts; struct list_head scripts;
struct list_head ext_mounts; struct list_head ext_mounts;
......
...@@ -19,6 +19,180 @@ ...@@ -19,6 +19,180 @@
#define NR_ATTEMPTS 5 #define NR_ATTEMPTS 5
const char frozen[] = "FROZEN";
const char freezing[] = "FREEZING";
const char thawed[] = "THAWED";
static const char *get_freezer_state(int fd)
{
int ret;
char path[PATH_MAX];
lseek(fd, 0, SEEK_SET);
ret = read(fd, path, sizeof(path) - 1);
if (ret <= 0) {
pr_perror("Unable to get a current state");
goto err;
}
if (path[ret - 1] == '\n')
path[ret - 1] = 0;
else
path[ret] = 0;
pr_debug("freezer.state=%s\n", path);
if (strcmp(path, frozen) == 0)
return frozen;
if (strcmp(path, freezing) == 0)
return freezing;
if (strcmp(path, thawed) == 0)
return thawed;
pr_err("Unknown freezer state: %s", path);
err:
return NULL;
}
static bool freezer_thawed;
static int freezer_restore_state(void)
{
int fd;
char path[PATH_MAX];
if (!opts.freeze_cgroup || freezer_thawed)
return 0;
snprintf(path, sizeof(path), "%s/freezer.state", opts.freeze_cgroup);
fd = open(path, O_RDWR);
if (fd < 0) {
pr_perror("Unable to open %s", path);
return -1;
}
if (write(fd, frozen, sizeof(frozen)) != sizeof(frozen)) {
pr_perror("Unable to freeze tasks");
close(fd);
return -1;
}
close(fd);
return 0;
}
static int freeze_processes(void)
{
int i, ret, fd, exit_code = -1;
char path[PATH_MAX];
const char *state = thawed;
FILE *f;
snprintf(path, sizeof(path), "%s/freezer.state", opts.freeze_cgroup);
fd = open(path, O_RDWR);
if (fd < 0) {
pr_perror("Unable to open %s", path);
return -1;
}
state = get_freezer_state(fd);
if (!state) {
close(fd);
return -1;
}
if (state == thawed) {
freezer_thawed = true;
lseek(fd, 0, SEEK_SET);
if (write(fd, frozen, sizeof(frozen)) != sizeof(frozen)) {
pr_perror("Unable to freeze tasks");
close(fd);
return -1;
}
}
/*
* There is not way to wait a specified state, so we need to poll the
* freezer.state.
* Here is one extra attempt to check that everything are frozen.
*/
for (i = 0; i <= NR_ATTEMPTS; i++) {
struct timespec req = {};
u64 timeout;
/*
* New tasks can appear while a freezer state isn't
* frozen, so we need to catch all new tasks.
*/
snprintf(path, sizeof(path), "%s/tasks", opts.freeze_cgroup);
f = fopen(path, "r");
if (f == NULL) {
pr_perror("Unable to open %s", path);
goto err;
}
while (fgets(path, sizeof(path), f)) {
pid_t pid;
pid = atoi(path);
/*
* Here we are going to skip tasks which are already traced.
* Ptraced tasks looks like children for us, so if
* a task isn't ptraced yet, waitpid() will return a error.
*/
ret = wait4(pid, NULL, __WALL | WNOHANG, NULL);
if (ret == 0)
continue;
if (seize_catch_task(pid)) {
/* fails when meets a zombie */
fclose(f);
if (state == frozen)
goto err;
}
}
fclose(f);
if (state == frozen)
break;
state = get_freezer_state(fd);
if (!state)
goto err;
if (state == frozen) {
/*
* Enumerate all tasks one more time to collect all new
* tasks, which can be born while the cgroup is being frozen.
*/
continue;
}
timeout = 10000000 * i;
req.tv_nsec = timeout % 1000000000;
req.tv_sec = timeout / 1000000000;
nanosleep(&req, NULL);
}
if (i > NR_ATTEMPTS) {
pr_err("Unable to freeze cgroup %s\n", opts.freeze_cgroup);
goto err;
}
exit_code = 0;
err:
if (exit_code == 0 || freezer_thawed) {
lseek(fd, 0, SEEK_SET);
if (write(fd, thawed, sizeof(thawed)) != sizeof(thawed)) {
pr_perror("Unable to thaw tasks");
exit_code = -1;
}
}
if (close(fd)) {
pr_perror("Unable to thaw tasks");
return -1;
}
return exit_code;
}
static inline bool child_collected(struct pstree_item *i, pid_t pid) static inline bool child_collected(struct pstree_item *i, pid_t pid)
{ {
struct pstree_item *c; struct pstree_item *c;
...@@ -59,8 +233,9 @@ static int collect_children(struct pstree_item *item) ...@@ -59,8 +233,9 @@ static int collect_children(struct pstree_item *item)
goto free; goto free;
} }
/* fails when meets a zombie */ if (!opts.freeze_cgroup)
seize_catch_task(pid); /* fails when meets a zombie */
seize_catch_task(pid);
ret = seize_wait_task(pid, item->pid.real, &dmpi(c)->pi_creds); ret = seize_wait_task(pid, item->pid.real, &dmpi(c)->pi_creds);
if (ret < 0) { if (ret < 0) {
...@@ -147,6 +322,9 @@ void pstree_switch_state(struct pstree_item *root_item, int st) ...@@ -147,6 +322,9 @@ void pstree_switch_state(struct pstree_item *root_item, int st)
{ {
struct pstree_item *item = root_item; struct pstree_item *item = root_item;
if (st != TASK_DEAD)
freezer_restore_state();
pr_info("Unfreezing tasks into %d\n", st); pr_info("Unfreezing tasks into %d\n", st);
for_each_pstree_item(item) for_each_pstree_item(item)
unseize_task_and_threads(item, st); unseize_task_and_threads(item, st);
...@@ -211,7 +389,7 @@ static int collect_threads(struct pstree_item *item) ...@@ -211,7 +389,7 @@ static int collect_threads(struct pstree_item *item)
pr_info("\tSeizing %d's %d thread\n", pr_info("\tSeizing %d's %d thread\n",
item->pid.real, pid); item->pid.real, pid);
if (seize_catch_task(pid)) if (!opts.freeze_cgroup && seize_catch_task(pid))
continue; continue;
ret = seize_wait_task(pid, item_ppid(item), &dmpi(item)->pi_creds); ret = seize_wait_task(pid, item_ppid(item), &dmpi(item)->pi_creds);
...@@ -258,6 +436,9 @@ static int collect_loop(struct pstree_item *item, ...@@ -258,6 +436,9 @@ static int collect_loop(struct pstree_item *item,
{ {
int attempts = NR_ATTEMPTS, nr_inprogress = 1; int attempts = NR_ATTEMPTS, nr_inprogress = 1;
if (opts.freeze_cgroup)
attempts = 1;
/* /*
* While we scan the proc and seize the children/threads * While we scan the proc and seize the children/threads
* new ones can appear (with clone(CLONE_PARENT) or with * new ones can appear (with clone(CLONE_PARENT) or with
...@@ -268,7 +449,7 @@ static int collect_loop(struct pstree_item *item, ...@@ -268,7 +449,7 @@ static int collect_loop(struct pstree_item *item,
* appear. * appear.
*/ */
while (nr_inprogress > 0 && attempts) { while (nr_inprogress > 0 && attempts >= 0) {
attempts--; attempts--;
nr_inprogress = collect(item); nr_inprogress = collect(item);
} }
...@@ -318,13 +499,16 @@ int collect_pstree(pid_t pid) ...@@ -318,13 +499,16 @@ int collect_pstree(pid_t pid)
timing_start(TIME_FREEZING); timing_start(TIME_FREEZING);
if (opts.freeze_cgroup && freeze_processes())
return -1;
root_item = alloc_pstree_item(); root_item = alloc_pstree_item();
if (root_item == NULL) if (root_item == NULL)
return -1; return -1;
root_item->pid.real = pid; root_item->pid.real = pid;
if (seize_catch_task(pid)) { if (!opts.freeze_cgroup && seize_catch_task(pid)) {
set_cr_errno(ESRCH); set_cr_errno(ESRCH);
goto err; goto err;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment