Commit cc2eb88c authored by Tycho Andersen's avatar Tycho Andersen Committed by Pavel Emelyanov

dump: when freezing fails, log unfrozen task stacks

When freezing, sometimes we can fail due to a timeout. If this is the case,
sometimes it can be a kernel bug where tasks are deadlocked. It would be
useful to know the kernel stacks to see where tasks are hung if they are.

Note that this is racy: a task might still freeze while we are logging the
various stacks, so we might miss things. But in the case of a deadlock, it
should catch what's wrong.

v2: log all stacks instead of just the unfrozen tasks
Signed-off-by: 's avatarTycho Andersen <tycho.andersen@canonical.com>
Signed-off-by: 's avatarPavel Emelyanov <xemul@virtuozzo.com>
parent fd61aff7
......@@ -243,6 +243,78 @@ static int freezer_detach(void)
return 0;
}
static int log_unfrozen_stacks(char *root)
{
DIR *dir;
struct dirent *de;
char path[PATH_MAX];
FILE *f;
snprintf(path, sizeof(path), "%s/tasks", root);
f = fopen(path, "r");
if (f == NULL) {
pr_perror("Unable to open %s", path);
return -1;
}
while (fgets(path, sizeof(path), f)) {
pid_t pid;
int ret, stack;
char stackbuf[2048];
pid = atoi(path);
stack = open_proc(pid, "stack");
if (stack < 0) {
pr_perror("couldn't log %d's stack", pid);
return -1;
}
ret = read(stack, stackbuf, sizeof(stackbuf));
close(stack);
if (ret < 0) {
pr_perror("couldn't read %d's stack", pid);
return -1;
}
stackbuf[ret] = '\0';
pr_debug("Task %d has stack:\n%s", pid, stackbuf);
}
fclose(f);
dir = opendir(root);
if (!dir) {
pr_perror("Unable to open %s", root);
return -1;
}
while ((de = readdir(dir))) {
struct stat st;
if (dir_dots(de))
continue;
sprintf(path, "%s/%s", root, de->d_name);
if (fstatat(dirfd(dir), de->d_name, &st, 0) < 0) {
pr_perror("stat of %s failed", path);
closedir(dir);
return -1;
}
if (!S_ISDIR(st.st_mode))
continue;
if (log_unfrozen_stacks(path) < 0) {
closedir(dir);
return -1;
}
}
closedir(dir);
return 0;
}
static int freeze_processes(void)
{
int i, fd, exit_code = -1;
......@@ -310,6 +382,10 @@ static int freeze_processes(void)
if (i > NR_ATTEMPTS) {
pr_err("Unable to freeze cgroup %s\n", opts.freeze_cgroup);
if (!pr_quelled(LOG_DEBUG))
log_unfrozen_stacks(opts.freeze_cgroup);
goto err;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment