Commit 1dd20b08 authored by Andrey Vagin's avatar Andrey Vagin Committed by Cyrill Gorcunov

restore: detect that someone failed (v2)

Handle SIGCHLD and if someone failed, nr_in_progress is set to -1.
If crtools notices that nr_in_progress is negative, it kills all
tasks.

v2: * Use named constants for task_entries->start in restorer.c
    * Use SA_NOCLDWAIT when setting sigchild handler,
      this makes sigchild handler simpler.
Signed-off-by: 's avatarAndrey Vagin <avagin@openvz.org>
Acked-by: 's avatarPavel Emelyanov <xemul@parallels.com>
Signed-off-by: 's avatarCyrill Gorcunov <gorcunov@openvz.org>
parent c8edb0c7
......@@ -958,6 +958,7 @@ static int open_pipe(int pid, struct pipe_entry *e, int *pipes_fd)
return attach_pipe(pid, e, pi, pipes_fd);
}
static rt_sigaction_t sigchld_act;
static int prepare_sigactions(int pid)
{
rt_sigaction_t act, oact;
......@@ -985,6 +986,10 @@ static int prepare_sigactions(int pid)
ASSIGN_TYPED(act.rt_sa_restorer, e.restorer);
ASSIGN_TYPED(act.rt_sa_mask.sig[0], e.mask);
if (sig == SIGCHLD) {
sigchld_act = act;
continue;
}
/*
* A pure syscall is used, because glibc
* sigaction overwrites se_restorer.
......@@ -1141,6 +1146,20 @@ err:
return ret;
}
static void sigchld_handler(int signal, siginfo_t *siginfo, void *data)
{
int status, pid;
if (siginfo->si_code & CLD_EXITED)
pr_err("%d exited, status=%d\n",
siginfo->si_pid, siginfo->si_status);
else if (siginfo->si_code & CLD_KILLED)
pr_err("%d killed by signal %d\n",
siginfo->si_pid, siginfo->si_status);
cr_wait_set(&task_entries->nr_in_progress, -1);
}
static int restore_task_with_children(int my_pid)
{
int *pids;
......@@ -1151,6 +1170,7 @@ static int restore_task_with_children(int my_pid)
/* The block mask will be restored in sigresturn
* This code should be removed, when a freezer will be added */
sigfillset(&blockmask);
sigdelset(&blockmask, SIGCHLD);
ret = sigprocmask(SIG_BLOCK, &blockmask, NULL);
if (ret) {
pr_perror("%d: Can't block signals\n", my_pid);
......@@ -1219,6 +1239,7 @@ static int restore_root_task(int fd, bool detach)
{
struct pstree_entry e;
int ret, i;
struct sigaction act;
ret = read(fd, &e, sizeof(e));
if (ret != sizeof(e)) {
......@@ -1228,13 +1249,33 @@ static int restore_root_task(int fd, bool detach)
close(fd);
ret = sigaction(SIGCHLD, NULL, &act);
if (ret < 0) {
perror("sigaction() failed\n");
return -1;
}
act.sa_flags |= SA_NOCLDWAIT | SA_NOCLDSTOP | SA_SIGINFO | SA_RESTART;
act.sa_sigaction = sigchld_handler;
ret = sigaction(SIGCHLD, &act, NULL);
if (ret < 0) {
perror("sigaction() failed\n");
return -1;
}
pr_info("Forking root with %d pid\n", e.pid);
ret = fork_with_pid(e.pid);
if (ret < 0)
return -1;
pr_info("Wait until all tasks are restored");
cr_wait_until(&task_entries->nr_in_progress, 0);
ret = cr_wait_until_greater(&task_entries->nr_in_progress, 0);
if (ret < 0) {
pr_err("Someone can't be restored\n");
for (i = 0; i < task_entries->nr; i++)
kill(task_entries->entries[i].pid, SIGKILL);
return 1;
}
for (i = 0; i < task_entries->nr; i++) {
pr_info("Wait while the task %d restored\n",
......@@ -1242,6 +1283,9 @@ static int restore_root_task(int fd, bool detach)
cr_wait_while(&task_entries->entries[i].done, 0);
}
cr_wait_set(&task_entries->nr_in_progress, task_entries->nr);
cr_wait_set(&task_entries->start, CR_STATE_RESTORE_SIGCHLD);
cr_wait_until(&task_entries->nr_in_progress, 0);
pr_info("Go on!!!\n");
cr_wait_set(&task_entries->start, CR_STATE_COMPLETE);
......@@ -1537,6 +1581,7 @@ static void sigreturn_restore(pid_t pstree_pid, pid_t pid)
task_args->fd_core = fd_core;
task_args->fd_self_vmas = fd_self_vmas;
task_args->logfd = get_logfd();
task_args->sigchld_act = sigchld_act;
cr_mutex_init(&task_args->rst_lock);
......
......@@ -77,6 +77,7 @@ struct task_restore_core_args {
struct thread_restore_args *thread_args; /* array of thread arguments */
struct shmems *shmems;
struct task_entries *task_entries;
rt_sigaction_t sigchld_act;
} __aligned(sizeof(long));
struct pt_regs {
......@@ -195,6 +196,7 @@ struct shmems {
enum {
CR_STATE_RESTORE,
CR_STATE_RESTORE_SIGCHLD,
CR_STATE_COMPLETE
};
......
......@@ -152,8 +152,6 @@ static always_inline long sys_open(const char *filename, unsigned long flags, un
return syscall3(__NR_open, (unsigned long)filename, flags, mode);
}
struct sigaction;
static always_inline long sys_sigaction(int signum, const rt_sigaction_t *act, rt_sigaction_t *oldact)
{
return syscall4(__NR_rt_sigaction, signum, (unsigned long)act, (unsigned long)oldact, sizeof(rt_sigset_t));
......
......@@ -73,7 +73,8 @@ typedef struct {
unsigned long sig[1];
} rt_sigset_t;
typedef void rt_signalfn_t(int);
struct siginfo;
typedef void rt_signalfn_t(int, struct siginfo *, void *);
typedef rt_signalfn_t *rt_sighandler_t;
typedef void rt_restorefn_t(void);
......
......@@ -5,6 +5,7 @@
#include <sys/types.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <fcntl.h>
#include <unistd.h>
#include <sched.h>
......@@ -20,6 +21,25 @@
#include "lock.h"
#include "restorer.h"
static struct task_entries *task_entries;
static void sigchld_handler(int signal, siginfo_t *siginfo, void *data)
{
int status, pid;
write_num(siginfo->si_pid);
if (siginfo->si_code & CLD_EXITED)
write_string(" exited, status=");
else if (siginfo->si_code & CLD_KILLED)
write_string(" killed by signal ");
write_num_n(siginfo->si_status);
cr_wait_set(&task_entries->nr_in_progress, -1);
/* sa_restorer may be unmaped, so we can't go back to userspace*/
sys_kill(sys_getpid(), SIGSTOP);
sys_exit(1);
}
/*
* Threads restoration via sigreturn. Note it's locked
* routine and calls for unlock at the end.
......@@ -131,6 +151,12 @@ long restore_task(struct task_restore_core_args *args)
struct rt_sigframe *rt_sigframe;
unsigned long new_sp, fsgs_base;
pid_t my_pid = sys_getpid();
rt_sigaction_t act;
task_entries = args->task_entries;
sys_sigaction(SIGCHLD, NULL, &act);
act.rt_sa_handler = sigchld_handler;
sys_sigaction(SIGCHLD, &act, NULL);
set_logfd(args->logfd);
......@@ -505,12 +531,20 @@ long restore_task(struct task_restore_core_args *args)
}
task_entry = task_get_entry(args->task_entries, my_pid);
cr_wait_dec(&args->task_entries->nr_in_progress);
cr_wait_set(&task_entry->done, 1);
write_num(sys_getpid());
write_string_n(": Restored");
cr_wait_while(&args->task_entries->start, CR_STATE_RESTORE);
sys_sigaction(SIGCHLD, &args->sigchld_act, NULL);
cr_wait_dec(&args->task_entries->nr_in_progress);
cr_wait_while(&args->task_entries->start, CR_STATE_RESTORE_SIGCHLD);
ret = sys_munmap(args->task_entries, TASK_ENTRIES_SIZE);
if (ret < 0) {
write_num_n(__LINE__);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment