Commit d0ff7307 authored by Andrey Ryabinin's avatar Andrey Ryabinin Committed by Pavel Emelyanov

dump: add timeout for collecting processes

Currently criu dump may hang indefinitely. E.g. in wait for task
that blocked in vfork() or task could be in D state for some other
reason. This patch adds time limit on collecting tasks during the
dump operation. If collecting processes takes too long, the dump
process will be terminated. Timeout is 5 seconds by default, but
it could be changed via parameter.
Signed-off-by: 's avatarAndrey Ryabinin <aryabinin@virtuozzo.com>
Signed-off-by: 's avatarPavel Emelyanov <xemul@parallels.com>
parent 9bfd62c5
......@@ -1384,6 +1384,25 @@ err_cure_imgset:
goto err;
}
typedef void (*sa_handler_t)(int);
static int setup_alarm_handler(sa_handler_t handler)
{
struct sigaction sa = {
.sa_handler = handler,
.sa_flags = 0,
};
sigemptyset(&sa.sa_mask);
sigaddset(&sa.sa_mask, SIGALRM);
if (sigaction(SIGALRM, &sa, NULL)) {
pr_perror("Unable to setup SIGALRM handler");
return -1;
}
return 0;
}
static int cr_pre_dump_finish(struct list_head *ctls, int ret)
{
struct parasite_ctl *ctl, *n;
......@@ -1436,6 +1455,15 @@ static int cr_pre_dump_finish(struct list_head *ctls, int ret)
return ret;
}
void pre_dump_alarm_handler(int signum)
{
LIST_HEAD(empty_list);
pr_err("Timeout reached\n");
cr_pre_dump_finish(&empty_list, -1);
exit(-1);
}
int cr_pre_dump_tasks(pid_t pid)
{
struct pstree_item *item;
......@@ -1470,6 +1498,9 @@ int cr_pre_dump_tasks(pid_t pid)
if (connect_to_page_server())
goto err;
if (setup_alarm_handler(pre_dump_alarm_handler))
goto err;
if (collect_pstree(pid))
goto err;
......@@ -1570,6 +1601,13 @@ static int cr_dump_finish(int ret)
return post_dump_ret ? : (ret != 0);
}
void dump_alarm_handler(int signum)
{
pr_err("Timeout reached\n");
cr_dump_finish(-1);
exit(-1);
}
int cr_dump_tasks(pid_t pid)
{
struct pstree_item *item;
......@@ -1617,6 +1655,9 @@ int cr_dump_tasks(pid_t pid)
if (connect_to_page_server())
goto err;
if (setup_alarm_handler(dump_alarm_handler))
goto err;
/*
* The collect_pstree will also stop (PTRACE_SEIZE) the tasks
* thus ensuring that they don't modify anything we collect
......
......@@ -65,6 +65,7 @@ void init_opts(void)
opts.manage_cgroups = CG_MODE_DEFAULT;
opts.ps_socket = -1;
opts.ghost_limit = DEFAULT_GHOST_LIMIT;
opts.timeout = DEFAULT_TIMEOUT;
}
static int parse_ns_string(const char *ptr)
......@@ -255,6 +256,7 @@ int main(int argc, char *argv[], char *envp[])
{ "ghost-limit", required_argument, 0, 1069 },
{ "irmap-scan-path", required_argument, 0, 1070 },
{ "lsm-profile", required_argument, 0, 1071 },
{ "timeout", required_argument, 0, 1072 },
{ },
};
......@@ -503,6 +505,8 @@ int main(int argc, char *argv[], char *envp[])
case 1071:
if (parse_lsm_arg(optarg) < 0)
return -1;
case 1072:
opts.timeout = atoi(optarg);
break;
case 'M':
{
......
......@@ -38,6 +38,8 @@ struct cg_root_opt {
*/
#define DEFAULT_GHOST_LIMIT (1 << 20)
#define DEFAULT_TIMEOUT 5
struct irmap;
struct irmap_path_opt {
......@@ -97,6 +99,7 @@ struct cr_options {
struct list_head irmap_scan_paths;
bool lsm_supplied;
char *lsm_profile;
unsigned int timeout;
};
extern struct cr_options opts;
......
......@@ -640,6 +640,13 @@ int collect_pstree(pid_t pid)
goto err;
}
/*
* wait4() may hang for some reason. Enable timer and fire SIGALRM
* if timeout reached. SIGALRM handler will do the necessary
* cleanups and terminate current process.
*/
alarm(opts.timeout);
ret = seize_wait_task(pid, -1, &dmpi(root_item)->pi_creds);
if (ret < 0)
goto err;
......@@ -658,6 +665,8 @@ int collect_pstree(pid_t pid)
timing_start(TIME_FROZEN);
err:
/* Freezing stage finished in time - disable timer. */
alarm(0);
return ret;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment