Commit ef3ca3a1 authored by Andrey Vagin's avatar Andrey Vagin Committed by Pavel Emelyanov

restore: do not kill processes if not-all of them have been created

If processes are restored without pidns, criu knows pidtheir -s from images,
but part of those task may have not yet forked, and thus the pids can not
exist or (!) be used by other processes.

To address that we abort stages RESTORE_NS and FORKING without killing tasks,
but with task_entries->start futex by writing STATE_FAIL into it and making
the tasks to check that. Since during RESTORE_NS and FORKING stages tasks can
only block on the mentioned futes, we can safely do it.
Signed-off-by: 's avatarAndrey Vagin <avagin@openvz.org>
Signed-off-by: 's avatarPavel Emelyanov <xemul@parallels.com>
parent 0c5b606b
......@@ -1147,7 +1147,8 @@ static int restore_task_with_children(void *_arg)
if (mount_proc())
exit(1);
restore_finish_stage(CR_STATE_RESTORE_NS);
if (restore_finish_stage(CR_STATE_RESTORE_NS) < 0)
exit(1);
if (root_prepare_shared())
exit(1);
......@@ -1190,7 +1191,8 @@ static int restore_task_with_children(void *_arg)
if (current->pgid == current->pid.virt)
restore_pgid();
restore_finish_stage(CR_STATE_FORKING);
if (restore_finish_stage(CR_STATE_FORKING) < 0)
exit(1);
if (current->pgid != current->pid.virt)
restore_pgid();
......@@ -1206,6 +1208,8 @@ static int restore_task_with_children(void *_arg)
static inline int stage_participants(int next_stage)
{
switch (next_stage) {
case CR_STATE_FAIL:
return 0;
case CR_STATE_RESTORE_NS:
return 1;
case CR_STATE_FORKING:
......@@ -1316,21 +1320,21 @@ static int restore_root_task(struct pstree_item *init)
ret = restore_switch_stage(CR_STATE_RESTORE_PGID);
if (ret < 0)
goto out;
goto out_kill;
ret = restore_switch_stage(CR_STATE_RESTORE);
if (ret < 0)
goto out;
goto out_kill;
ret = restore_switch_stage(CR_STATE_RESTORE_SIGCHLD);
if (ret < 0)
goto out;
goto out_kill;
/* Restore SIGCHLD here to skip SIGCHLD from a network sctip */
ret = sigaction(SIGCHLD, &old_act, NULL);
if (ret < 0) {
pr_perror("sigaction() failed");
goto out;
goto out_kill;
}
/* Unlock network before disabling repair mode on sockets */
......@@ -1356,7 +1360,11 @@ static int restore_root_task(struct pstree_item *init)
return 0;
out:
out_kill:
/*
* The processes can be killed only when all of them have been created,
* otherwise an external proccesses can be killed.
*/
if (current_ns_mask & CLONE_NEWPID) {
/* Kill init */
if (root_item->pid.real > 0)
......@@ -1369,6 +1377,8 @@ out:
kill(pi->pid.virt, SIGKILL);
}
out:
__restore_switch_stage(CR_STATE_FAIL);
pr_err("Restoring FAILED.\n");
return 1;
}
......
......@@ -180,7 +180,8 @@ struct shmems {
#define TASK_ENTRIES_SIZE 4096
enum {
CR_STATE_RESTORE_NS, /* is used for executing "setup-namespace" scripts */
CR_STATE_FAIL = -1,
CR_STATE_RESTORE_NS = 0, /* is used for executing "setup-namespace" scripts */
CR_STATE_FORKING,
CR_STATE_RESTORE_PGID,
CR_STATE_RESTORE,
......@@ -217,10 +218,11 @@ find_shmem(struct shmems *shmems, unsigned long shmid)
return NULL;
}
#define restore_finish_stage(__stage) do { \
#define restore_finish_stage(__stage) ({ \
futex_dec_and_wake(&task_entries->nr_in_progress); \
futex_wait_while(&task_entries->start, __stage); \
} while (0)
(s32) futex_get(&task_entries->start); \
})
/* the restorer_blob_offset__ prefix is added by gen_offsets.sh */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment