Commit 51876eea authored by Tycho Andersen's avatar Tycho Andersen Committed by Pavel Emelyanov

Attempt to restore cgroups

During the dump phase, /proc/cgroups is parsed to find co-mounted cgroups.
Then, for each task /proc/self/cgroup is parsed for the cgroups that it is a
member of, and that cgroup is traversed to find any child cgroups which may
also need restoring. Any cgroups not currently mounted will be temporarily
mounted and traversed. All of this information is persisted along with the
original cg_sets, which indicate which cgroups a task is a member of.

On restore, an initial phase creates all the cgroups which were saved. Tasks
are then restored into these cgroups via cg_sets as usual.
Signed-off-by: 's avatarTycho Andersen <tycho.andersen@canonical.com>
Signed-off-by: 's avatarPavel Emelyanov <xemul@parallels.com>
parent e1b56c8f
This diff is collapsed.
...@@ -1776,6 +1776,9 @@ int cr_dump_tasks(pid_t pid) ...@@ -1776,6 +1776,9 @@ int cr_dump_tasks(pid_t pid)
if (vdso_init()) if (vdso_init())
goto err; goto err;
if (parse_cg_info())
goto err;
if (write_img_inventory()) if (write_img_inventory())
goto err; goto err;
......
...@@ -8,4 +8,43 @@ int dump_cgroups(void); ...@@ -8,4 +8,43 @@ int dump_cgroups(void);
int prepare_task_cgroup(struct pstree_item *); int prepare_task_cgroup(struct pstree_item *);
int prepare_cgroup(void); int prepare_cgroup(void);
void fini_cgroup(void); void fini_cgroup(void);
struct cg_controller;
/* This describes a particular cgroup path, e.g. the '/lxc/u1' part of
* 'blkio/lxc/u1' and any properties it has.
*/
struct cgroup_dir {
char *path;
/* this is how children are linked together */
struct list_head siblings;
/* more cgroup_dirs */
struct list_head children;
unsigned int n_children;
struct cg_controller *controller;
};
/* This describes a particular cgroup controller, e.g. blkio or cpuset.
* The heads are subdirectories organized in their tree format.
*/
struct cg_controller {
int heirarchy;
unsigned int n_controllers;
char **controllers;
/* cgroup_dirs */
struct list_head heads;
unsigned int n_heads;
/* for cgroup list in cgroup.c */
struct list_head l;
};
struct cg_controller *new_controller(const char *name, int heirarchy);
/* parse all global cgroup information into structures */
int parse_cg_info(void);
#endif /* __CR_CGROUP_H__ */ #endif /* __CR_CGROUP_H__ */
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
#include "asm/types.h" #include "asm/types.h"
#include "image.h" #include "image.h"
#include "list.h" #include "list.h"
#include "cgroup.h"
#include "protobuf/eventfd.pb-c.h" #include "protobuf/eventfd.pb-c.h"
#include "protobuf/eventpoll.pb-c.h" #include "protobuf/eventpoll.pb-c.h"
...@@ -203,4 +204,6 @@ struct cg_ctl { ...@@ -203,4 +204,6 @@ struct cg_ctl {
extern int parse_task_cgroup(int pid, struct list_head *l, unsigned int *n); extern int parse_task_cgroup(int pid, struct list_head *l, unsigned int *n);
extern void put_ctls(struct list_head *); extern void put_ctls(struct list_head *);
int parse_cgroups(struct list_head *cgroups, unsigned int *n_cgroups);
#endif /* __CR_PROC_PARSE_H__ */ #endif /* __CR_PROC_PARSE_H__ */
...@@ -288,7 +288,7 @@ int vaddr_to_pfn(unsigned long vaddr, u64 *pfn); ...@@ -288,7 +288,7 @@ int vaddr_to_pfn(unsigned long vaddr, u64 *pfn);
/* /*
* Check whether @str starts with @sub * Check whether @str starts with @sub
*/ */
static inline bool strstartswith(char *str, char *sub) static inline bool strstartswith(const char *str, const char *sub)
{ {
while (1) { while (1) {
if (*sub == '\0') /* end of sub -- match */ if (*sub == '\0') /* end of sub -- match */
...@@ -303,4 +303,16 @@ static inline bool strstartswith(char *str, char *sub) ...@@ -303,4 +303,16 @@ static inline bool strstartswith(char *str, char *sub)
} }
} }
/*
* mkdir -p
*/
int mkdirp(const char *path);
/*
* Tests whether a path is a prefix of another path. This is different than
* strstartswith because "/foo" is _not_ a path prefix of "/foobar", since they
* refer to different directories.
*/
bool is_path_prefix(const char *path, const char *prefix);
FILE *fopenat(int dirfd, char *path, char *cflags);
#endif /* __CR_UTIL_H__ */ #endif /* __CR_UTIL_H__ */
...@@ -861,6 +861,9 @@ static struct fstype fstypes[] = { ...@@ -861,6 +861,9 @@ static struct fstype fstypes[] = {
}, { }, {
.name = "debugfs", .name = "debugfs",
.code = FSTYPE__DEBUGFS, .code = FSTYPE__DEBUGFS,
}, {
.name = "cgroup",
.code = FSTYPE__CGROUP,
} }
}; };
......
...@@ -1547,7 +1547,7 @@ int parse_task_cgroup(int pid, struct list_head *retl, unsigned int *n) ...@@ -1547,7 +1547,7 @@ int parse_task_cgroup(int pid, struct list_head *retl, unsigned int *n)
} }
list_for_each_entry(cc, retl, l) list_for_each_entry(cc, retl, l)
if (strcmp(cc->name, name) >= 0) if (strcmp(cc->name, name) >= 0 && strcmp(cc->path, path) >= 0)
break; break;
list_add_tail(&ncc->l, &cc->l); list_add_tail(&ncc->l, &cc->l);
...@@ -1573,3 +1573,76 @@ void put_ctls(struct list_head *l) ...@@ -1573,3 +1573,76 @@ void put_ctls(struct list_head *l)
xfree(c); xfree(c);
} }
} }
/* Parse and create all the real controllers. This does not include things with
* the "name=" prefix, e.g. systemd.
*/
int parse_cgroups(struct list_head *cgroups, unsigned int *n_cgroups)
{
FILE *f;
char buf[1024], name[1024];
int heirarchy, ret = 0;
struct cg_controller *cur = NULL;
f = fopen("/proc/cgroups", "r");
if (!f) {
pr_perror("failed opening /proc/cgroups");
return -1;
}
/* throw away the header */
if (!fgets(buf, 1024, f)) {
ret = -1;
goto out;
}
while (fgets(buf, 1024, f)) {
char *n;
char found = 0;
sscanf(buf, "%s %d", name, &heirarchy);
list_for_each_entry(cur, cgroups, l) {
if (cur->heirarchy == heirarchy) {
void *m;
found = 1;
cur->n_controllers++;
m = xrealloc(cur->controllers, sizeof(char *) * cur->n_controllers);
if (!m) {
ret = -1;
goto out;
}
cur->controllers = m;
if (!cur->controllers) {
ret = -1;
goto out;
}
n = xstrdup(name);
if (!n) {
ret = -1;
goto out;
}
cur->controllers[cur->n_controllers-1] = n;
break;
}
}
if (!found) {
struct cg_controller *nc = new_controller(name, heirarchy);
if (!nc) {
ret = -1;
goto out;
}
list_add_tail(&nc->l, &cur->l);
(*n_cgroups)++;
}
}
out:
fclose(f);
return ret;
}
message cgroup_dir_entry {
required string path = 1;
repeated cgroup_dir_entry children = 4;
}
message cg_controller_entry {
required uint32 id = 1;
repeated string controllers = 2;
repeated cgroup_dir_entry dirs = 3;
}
message cg_member_entry { message cg_member_entry {
required string name = 1; required string name = 1;
required string path = 2; required string path = 2;
...@@ -9,5 +20,6 @@ message cg_set_entry { ...@@ -9,5 +20,6 @@ message cg_set_entry {
} }
message cgroup_entry { message cgroup_entry {
repeated cg_set_entry sets = 1; repeated cg_set_entry sets = 1;
repeated cg_controller_entry controllers = 2;
} }
...@@ -11,6 +11,7 @@ enum fstype { ...@@ -11,6 +11,7 @@ enum fstype {
SECURITYFS = 9; SECURITYFS = 9;
FUSECTL = 10; FUSECTL = 10;
DEBUGFS = 11; DEBUGFS = 11;
CGROUP = 12;
}; };
message mnt_entry { message mnt_entry {
......
...@@ -172,6 +172,7 @@ ns/static/tun ...@@ -172,6 +172,7 @@ ns/static/tun
static/netns-nf static/netns-nf
static/netns static/netns
static/cgroup00 static/cgroup00
static/cgroup01
ns/static/clean_mntns ns/static/clean_mntns
" "
...@@ -203,6 +204,7 @@ bind-mount ...@@ -203,6 +204,7 @@ bind-mount
mountpoints mountpoints
inotify_irmap inotify_irmap
cgroup00 cgroup00
cgroup01
clean_mntns clean_mntns
deleted_dev deleted_dev
" "
......
...@@ -163,6 +163,7 @@ TST_DIR = \ ...@@ -163,6 +163,7 @@ TST_DIR = \
bind-mount \ bind-mount \
cgroup00 \ cgroup00 \
rmdir_open \ rmdir_open \
cgroup01 \
TST_DIR_FILE = \ TST_DIR_FILE = \
chroot \ chroot \
......
#include <unistd.h>
#include <stdio.h>
#include <string.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/mount.h>
#include "zdtmtst.h"
const char *test_doc = "Check that empty cgroups are preserved";
const char *test_author = "Tycho Andersen <tycho.andersen@canonical.com>";
char *dirname;
TEST_OPTION(dirname, string, "cgroup directory name", 1);
static const char *cgname = "zdtmtst";
static const char *subname = "subcg";
static const char *empty = "empty";
int main(int argc, char **argv)
{
int cgfd, l, ret = 1;
char aux[1024], paux[1024];
FILE *cgf;
struct stat st;
test_init(argc, argv);
if (mkdir(dirname, 0700) < 0) {
err("Can't make dir");
goto out;
}
sprintf(aux, "none,name=%s", cgname);
if (mount("none", dirname, "cgroup", 0, aux)) {
err("Can't mount cgroups");
goto out_rd;
}
sprintf(paux, "%s/%s", dirname, subname);
mkdir(paux, 0600);
l = sprintf(aux, "%d", getpid());
sprintf(paux, "%s/%s/tasks", dirname, subname);
cgfd = open(paux, O_WRONLY);
if (cgfd < 0) {
err("Can't open tasks");
goto out_rs;
}
l = write(cgfd, aux, l);
close(cgfd);
if (l < 0) {
err("Can't move self to subcg");
goto out_rs;
}
sprintf(paux, "%s/%s/%s", dirname, subname, empty);
mkdir(paux, 0600);
test_daemon();
test_waitsig();
cgf = fopen("/proc/self/mountinfo", "r");
if (cgf == NULL) {
fail("No mountinfo file");
goto out_rs;
}
while (fgets(paux, sizeof(paux), cgf)) {
char *s;
s = strstr(paux, cgname);
if (s) {
sscanf(paux, "%*d %*d %*d:%*d %*s %s", aux);
test_msg("found cgroup at %s\n", aux);
sprintf(paux, "%s/%s/%s", aux, subname, empty);
if (stat(paux, &st)) {
fail("couldn't stat %s\n", paux);
ret = -1;
goto out_close;
}
if (!S_ISDIR(st.st_mode)) {
fail("%s is not a directory\n", paux);
ret = -1;
goto out_close;
}
pass();
ret = 0;
goto out_close;
}
}
fail("empty cgroup not found!\n");
out_close:
fclose(cgf);
sprintf(paux, "%s/%s/%s", dirname, subname, empty);
rmdir(paux);
out_rs:
sprintf(paux, "%s/%s", dirname, subname);
rmdir(paux);
umount(dirname);
out_rd:
rmdir(dirname);
out:
return ret;
}
...@@ -678,3 +678,80 @@ struct vma_area *alloc_vma_area(void) ...@@ -678,3 +678,80 @@ struct vma_area *alloc_vma_area(void)
return p; return p;
} }
int mkdirp(const char *path)
{
size_t i;
char made_path[PATH_MAX], *pos;
if (strlen(path) >= PATH_MAX) {
pr_err("path %s is longer than PATH_MAX", path);
return -1;
}
strcpy(made_path, path);
i = 0;
if (made_path[0] == '/')
i++;
for (; i < strlen(made_path); i++) {
pos = strchr(made_path + i, '/');
if (pos)
*pos = '\0';
if (mkdir(made_path, 0755) < 0 && errno != EEXIST) {
pr_perror("couldn't mkdirpat directory\n");
return -1;
}
if (pos) {
*pos = '/';
i = pos - made_path;
} else
break;
}
return 0;
}
bool is_path_prefix(const char *path, const char *prefix)
{
if (strstartswith(path, prefix)) {
size_t len = strlen(prefix);
switch (path[len]) {
case '\0':
case '/':
return true;
}
}
return false;
}
FILE *fopenat(int dirfd, char *path, char *cflags)
{
int tmp, flags = 0;
char *iter;
for (iter = cflags; *iter; iter++) {
switch (*iter) {
case 'r':
flags |= O_RDONLY;
break;
case 'a':
flags |= O_APPEND;
break;
case 'w':
flags |= O_WRONLY | O_CREAT;
break;
case '+':
flags = O_RDWR | O_CREAT;
break;
}
}
tmp = openat(dirfd, path, flags, S_IRUSR | S_IWUSR);
if (tmp < 0)
return NULL;
return fdopen(tmp, cflags);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment