Commit 994ae676 authored by Cyrill Gorcunov's avatar Cyrill Gorcunov Committed by Pavel Emelyanov

restore: Set CLONE_PARENT iif pdeath_sig is present, v4

It's been discovered that on 3.11 we might fail on restore
if pass @CLONE_PARENT flag into clone() call due to kernel
limitations.

Because we're treating 3.11 as a base working kernel lets
do a trick instead

 - setup this flag iif pdeath_sig is present
 - if CLONE_NEWPID is passed warn a user about
   potential consequences.
 - because we need to carry the condition in attach_to_tasks
   call, introduce @root_as_sibling variable for this.

CC: Tycho Andersen <tycho.andersen@canonical.com>
CC: Pavel Emelyanov <xemul@parallels.com>
CC: Andrey Vagin <avagin@openvz.org>
Signed-off-by: 's avatarCyrill Gorcunov <gorcunov@openvz.org>
Acked-by: 's avatarAndrey Vagin <avagin@openvz.org>
Signed-off-by: 's avatarPavel Emelyanov <xemul@parallels.com>
parent 47fae013
...@@ -93,6 +93,8 @@ static int prepare_rlimits(int pid, CoreEntry *core); ...@@ -93,6 +93,8 @@ static int prepare_rlimits(int pid, CoreEntry *core);
static int prepare_posix_timers(int pid, CoreEntry *core); static int prepare_posix_timers(int pid, CoreEntry *core);
static int prepare_signals(int pid); static int prepare_signals(int pid);
static int root_as_sibling;
static int shmem_remap(void *old_addr, void *new_addr, unsigned long size) static int shmem_remap(void *old_addr, void *new_addr, unsigned long size)
{ {
void *ret; void *ret;
...@@ -922,6 +924,34 @@ struct cr_clone_arg { ...@@ -922,6 +924,34 @@ struct cr_clone_arg {
CoreEntry *core; CoreEntry *core;
}; };
static void maybe_clone_parent(struct pstree_item *item,
struct cr_clone_arg *ca)
{
if (opts.swrk_restore ||
(opts.restore_detach && ca->core->thread_core->pdeath_sig)) {
/*
* This means we're called from lib's criu_restore_child().
* In that case create the root task as the child one to+
* the caller. This is the only way to correctly restore the
* pdeath_sig of the root task. But also looks nice.
*
* Alternatively, if we are --restore-detached, a similar trick is
* needed to correctly restore pdeath_sig and prevent processes from
* dying once restored.
*
* There were a problem in kernel 3.11 -- CLONE_PARENT can't be
* set together with CLONE_NEWPID, which has been solved in further
* versions of the kernels, but we treat 3.11 as a base, so at
* least warn a user about potential problems.
*/
item->rst->clone_flags |= CLONE_PARENT;
root_as_sibling = 1;
if (item->rst->clone_flags & CLONE_NEWPID)
pr_warn("Set CLONE_PARENT | CLONE_NEWPID but it might cause restore problem,"
"because not all kernels support such clone flags combinations!\n");
}
}
static inline int fork_with_pid(struct pstree_item *item) static inline int fork_with_pid(struct pstree_item *item)
{ {
int ret = -1, fd; int ret = -1, fd;
...@@ -951,6 +981,9 @@ static inline int fork_with_pid(struct pstree_item *item) ...@@ -951,6 +981,9 @@ static inline int fork_with_pid(struct pstree_item *item)
pr_err("Unknown task state %d\n", item->state); pr_err("Unknown task state %d\n", item->state);
return -1; return -1;
} }
if (unlikely(item == root_item))
maybe_clone_parent(item, &ca);
} else { } else {
/* /*
* Helper entry will not get moved around and thus * Helper entry will not get moved around and thus
...@@ -1107,7 +1140,7 @@ static int criu_signals_setup(void) ...@@ -1107,7 +1140,7 @@ static int criu_signals_setup(void)
} }
act.sa_flags |= SA_NOCLDSTOP | SA_SIGINFO | SA_RESTART; act.sa_flags |= SA_NOCLDSTOP | SA_SIGINFO | SA_RESTART;
if (opts.swrk_restore || opts.restore_detach) if (root_as_sibling)
/* /*
* Root task will be our sibling. This means, that * Root task will be our sibling. This means, that
* we will not notice when (if) it dies in SIGCHLD * we will not notice when (if) it dies in SIGCHLD
...@@ -1594,24 +1627,11 @@ static int restore_root_task(struct pstree_item *init) ...@@ -1594,24 +1627,11 @@ static int restore_root_task(struct pstree_item *init)
futex_set(&task_entries->nr_in_progress, futex_set(&task_entries->nr_in_progress,
stage_participants(CR_STATE_RESTORE_NS)); stage_participants(CR_STATE_RESTORE_NS));
/*
* This means we're called from lib's criu_restore_child().
* In that case create the root task as the child one to+
* the caller. This is the only way to correctly restore the
* pdeath_sig of the root task. But also looks nice.
*
* Alternatively, if we are --restore-detached, a similar trick is
* needed to correctly restore pdeath_sig and prevent processes from
* dying once restored.
*/
if (opts.swrk_restore || opts.restore_detach)
init->rst->clone_flags |= CLONE_PARENT;
ret = fork_with_pid(init); ret = fork_with_pid(init);
if (ret < 0) if (ret < 0)
return -1; return -1;
if (opts.swrk_restore || opts.restore_detach) { if (root_as_sibling) {
if (ptrace(PTRACE_SEIZE, init->pid.real, 0, 0)) { if (ptrace(PTRACE_SEIZE, init->pid.real, 0, 0)) {
pr_perror("Can't attach to init"); pr_perror("Can't attach to init");
goto out; goto out;
...@@ -1670,7 +1690,7 @@ static int restore_root_task(struct pstree_item *init) ...@@ -1670,7 +1690,7 @@ static int restore_root_task(struct pstree_item *init)
timing_stop(TIME_RESTORE); timing_stop(TIME_RESTORE);
ret = attach_to_tasks(opts.swrk_restore); ret = attach_to_tasks(root_as_sibling);
pr_info("Restore finished successfully. Resuming tasks.\n"); pr_info("Restore finished successfully. Resuming tasks.\n");
futex_set_and_wake(&task_entries->start, CR_STATE_COMPLETE); futex_set_and_wake(&task_entries->start, CR_STATE_COMPLETE);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment