Commit 53957fad authored by Pavel Emelyanov's avatar Pavel Emelyanov

restore: Introduce the --restore-sibling option

We have a slight mess with how criu restores root task.
Right now we have the following options.

1) CLI
	a) Usually
	task calling criu
	 `- criu
	     `- root restored task

	b) when --restore-detached AND root has pdeath_sig

	task calling criu
	 `- criu
	 `- root restored task

2) Library/SWRK
	task using lib/swrk
	 `- criu
	 `- root restored task

3) Standalone service
	a) Usually
	service
	 `- service sub task
	     `- root restored task

	b) when root has pdeath_sig
	criu service
	 `- criu sub task
	 `- root restored task

It would be better is CRIU always restored the root task as sibling,
but we have 3 constraints:

First, the case 1.a is kept for zdtm to run tests in pid namespaces
on 3.11, which in turn doesn't allow CLONE_PARENT | CLONE_NEWPID.

Second, CLI w/o --restore-detach waits for the restored task to die and
this behavior can be "expected" already.

Third, in case of standalone service tasks shouldn't become service's
children.

And I have one "plan". The p.haul project while live migrating tasks
on destination node starts a service, which uses library/swrk mode. In
this case the restored processes become p.haul service's kids which is
also not great.

That said, here's the option called --restore-child that pairs the
--restore-detach like this:

* detached AND child:

task
 `- criu restore (exits at the end)
 `- root task

The root task will become task's child.
This will be default to library/swrk.
This is what LXC needs.

* detach AND !child

task
 `- criu restore (exits at the end)
     `- root task

The root task will get re-parented to init.
This will be compatible with 1.3.
This will be default to standalone service and
to my wish with the p.haul case.

* !detach AND child

task
 `- criu restore (waits for root task to die)
 `- root task

This should be deprecated, so that criu restore doesn't mess
 with task <-> root task signalling.

* !detach AND !child

task
 `- criu restore (waits for root task to die)
     `- root task

This is how plain criu restore works now.
Signed-off-by: 's avatarPavel Emelyanov <xemul@parallels.com>
Acked-by: 's avatarTycho Andersen <tycho.andersen@canonical.com>
Acked-by: 's avatarAndrew Vagin <avagin@openvz.org>
parent 1ff2500b
...@@ -963,8 +963,7 @@ static void maybe_clone_parent(struct pstree_item *item, ...@@ -963,8 +963,7 @@ static void maybe_clone_parent(struct pstree_item *item,
* off of 3.11, this condition can be simplified to just test the * off of 3.11, this condition can be simplified to just test the
* options and not have the pdeath_sig test. * options and not have the pdeath_sig test.
*/ */
if (opts.swrk_restore || if (opts.restore_sibling) {
(opts.restore_detach && ca->core->thread_core->pdeath_sig)) {
/* /*
* This means we're called from lib's criu_restore_child(). * This means we're called from lib's criu_restore_child().
* In that case create the root task as the child one to+ * In that case create the root task as the child one to+
...@@ -985,6 +984,10 @@ static void maybe_clone_parent(struct pstree_item *item, ...@@ -985,6 +984,10 @@ static void maybe_clone_parent(struct pstree_item *item,
if (item->rst->clone_flags & CLONE_NEWPID) if (item->rst->clone_flags & CLONE_NEWPID)
pr_warn("Set CLONE_PARENT | CLONE_NEWPID but it might cause restore problem," pr_warn("Set CLONE_PARENT | CLONE_NEWPID but it might cause restore problem,"
"because not all kernels support such clone flags combinations!\n"); "because not all kernels support such clone flags combinations!\n");
} else if (opts.restore_detach) {
if (ca->core->thread_core->pdeath_sig)
pr_warn("Root task has pdeath_sig configured, so it will receive one _right_"
"after restore on CRIU exit\n");
} }
} }
......
...@@ -259,6 +259,15 @@ static int setup_opts_from_req(int sk, CriuOpts *req) ...@@ -259,6 +259,15 @@ static int setup_opts_from_req(int sk, CriuOpts *req)
if (req->root) if (req->root)
opts.root = req->root; opts.root = req->root;
if (req->has_rst_sibling) {
if (!opts.swrk_restore) {
pr_err("rst_sibling is not allowed in standalone service\n");
return -1;
}
opts.restore_sibling = req->rst_sibling;
}
if (req->has_tcp_established) if (req->has_tcp_established)
opts.tcp_established_ok = req->tcp_established; opts.tcp_established_ok = req->tcp_established;
......
...@@ -133,13 +133,14 @@ int main(int argc, char *argv[], char *envp[]) ...@@ -133,13 +133,14 @@ int main(int argc, char *argv[], char *envp[])
int log_level = LOG_UNSET; int log_level = LOG_UNSET;
char *imgs_dir = "."; char *imgs_dir = ".";
char *work_dir = NULL; char *work_dir = NULL;
static const char short_opts[] = "dsRf:F:t:p:hcD:o:n:v::xVr:jlW:L:M:"; static const char short_opts[] = "dSsRf:F:t:p:hcD:o:n:v::xVr:jlW:L:M:";
static struct option long_opts[] = { static struct option long_opts[] = {
{ "tree", required_argument, 0, 't' }, { "tree", required_argument, 0, 't' },
{ "pid", required_argument, 0, 'p' }, { "pid", required_argument, 0, 'p' },
{ "leave-stopped", no_argument, 0, 's' }, { "leave-stopped", no_argument, 0, 's' },
{ "leave-running", no_argument, 0, 'R' }, { "leave-running", no_argument, 0, 'R' },
{ "restore-detached", no_argument, 0, 'd' }, { "restore-detached", no_argument, 0, 'd' },
{ "restore-sibling", no_argument, 0, 'S' },
{ "daemon", no_argument, 0, 'd' }, { "daemon", no_argument, 0, 'd' },
{ "contents", no_argument, 0, 'c' }, { "contents", no_argument, 0, 'c' },
{ "file", required_argument, 0, 'f' }, { "file", required_argument, 0, 'f' },
...@@ -247,6 +248,9 @@ int main(int argc, char *argv[], char *envp[]) ...@@ -247,6 +248,9 @@ int main(int argc, char *argv[], char *envp[])
case 'd': case 'd':
opts.restore_detach = true; opts.restore_detach = true;
break; break;
case 'S':
opts.restore_sibling = true;
break;
case 'D': case 'D':
imgs_dir = optarg; imgs_dir = optarg;
break; break;
...@@ -404,6 +408,11 @@ int main(int argc, char *argv[], char *envp[]) ...@@ -404,6 +408,11 @@ int main(int argc, char *argv[], char *envp[])
} }
} }
if (!opts.restore_detach && opts.restore_sibling) {
pr_msg("--restore-sibling only makes sense with --restore-detach\n");
return 1;
}
if (work_dir == NULL) if (work_dir == NULL)
work_dir = imgs_dir; work_dir = imgs_dir;
...@@ -540,6 +549,7 @@ usage: ...@@ -540,6 +549,7 @@ usage:
"* Generic:\n" "* Generic:\n"
" -t|--tree PID checkpoint a process tree identified by PID\n" " -t|--tree PID checkpoint a process tree identified by PID\n"
" -d|--restore-detached detach after restore\n" " -d|--restore-detached detach after restore\n"
" -S|--restore-sibling restore root task as sibling\n"
" -s|--leave-stopped leave tasks in stopped state after checkpoint\n" " -s|--leave-stopped leave tasks in stopped state after checkpoint\n"
" -R|--leave-running leave tasks in running state after checkpoint\n" " -R|--leave-running leave tasks in running state after checkpoint\n"
" -D|--images-dir DIR directory for image files\n" " -D|--images-dir DIR directory for image files\n"
......
...@@ -24,6 +24,7 @@ struct cr_options { ...@@ -24,6 +24,7 @@ struct cr_options {
bool check_ms_kernel; bool check_ms_kernel;
bool show_pages_content; bool show_pages_content;
bool restore_detach; bool restore_detach;
bool restore_sibling;
bool ext_unix_sk; bool ext_unix_sk;
bool shell_job; bool shell_job;
bool handle_file_locks; bool handle_file_locks;
......
...@@ -666,6 +666,9 @@ int criu_restore_child(void) ...@@ -666,6 +666,9 @@ int criu_restore_child(void)
req.type = CRIU_REQ_TYPE__RESTORE; req.type = CRIU_REQ_TYPE__RESTORE;
req.opts = opts; req.opts = opts;
req.opts->has_rst_sibling = true;
req.opts->rst_sibling = true;
ret = send_req_and_recv_resp_sk(sks[0], &req, &resp); ret = send_req_and_recv_resp_sk(sks[0], &req, &resp);
close(sks[0]); close(sks[0]);
......
...@@ -53,6 +53,8 @@ message criu_opts { ...@@ -53,6 +53,8 @@ message criu_opts {
repeated ext_mount_map ext_mnt = 23; repeated ext_mount_map ext_mnt = 23;
optional bool manage_cgroups = 24; optional bool manage_cgroups = 24;
repeated cgroup_root cg_root = 25; repeated cgroup_root cg_root = 25;
optional bool rst_sibling = 26; /* swrk only */
} }
message criu_dump_resp { message criu_dump_resp {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment