Commit 2c623a56 authored by Pavel Emelyanov's avatar Pavel Emelyanov

mount: Move fstypes related code into separate files

After this the mount.c is no longer the fattest .c in the tree:

  - before
    3947 criu/mount.c
    3179 criu/cr-restore.c
    2622 criu/proc_parse.c
    2095 criu/tty.c

  - after
    3179 criu/cr-restore.c
    3170 criu/mount.c
    2623 criu/proc_parse.c
    2095 criu/tty.c

One "not move" thing is -- since fstype array is static to the
filesystems.c and mount.c needs to access fstype[1] for auto
mounts, the fstype_auto() call is added.
Signed-off-by: 's avatarPavel Emelyanov <xemul@virtuozzo.com>
parent f0ea03bd
......@@ -35,6 +35,7 @@ obj-y += log.o
obj-y += lsm.o
obj-y += mem.o
obj-y += mount.o
obj-y += filesystems.o
obj-y += namespaces.o
obj-y += netfilter.o
obj-y += net.o
......
......@@ -28,6 +28,7 @@
#include "page-xfer.h"
#include "net.h"
#include "mount.h"
#include "filesystems.h"
#include "cgroup.h"
#include "cgroup-props.h"
#include "action-scripts.h"
......
......@@ -37,6 +37,7 @@
#include "cr-service.h"
#include "plugin.h"
#include "mount.h"
#include "filesystems.h"
#include "namespaces.h"
#include "cgroup.h"
#include "cgroup-props.h"
......
#include <stdio.h>
#include <stdbool.h>
#include <string.h>
#include <fcntl.h>
#include <sys/mount.h>
#include "asm/types.h"
#include "compiler.h"
#include "xmalloc.h"
#include "cr_options.h"
#include "filesystems.h"
#include "namespaces.h"
#include "mount.h"
#include "pstree.h"
#include "kerndat.h"
#include "protobuf.h"
#include "autofs.h"
#include "util.h"
#include "fs-magic.h"
#include "images/mnt.pb-c.h"
#include "images/binfmt-misc.pb-c.h"
static int attach_option(struct mount_info *pm, char *opt)
{
if (pm->options[0] == '\0')
pm->options = xstrcat(pm->options, "%s", opt);
else
pm->options = xstrcat(pm->options, ",%s", opt);
return pm->options ? 0 : -1;
}
struct binfmt_misc_info {
BinfmtMiscEntry *bme;
struct list_head list;
};
LIST_HEAD(binfmt_misc_list);
static int binfmt_misc_parse(struct mount_info *pm)
{
if (pm->nsid->type == NS_ROOT)
opts.has_binfmt_misc = true;
return 0;
}
static int binfmt_misc_virtual(struct mount_info *pm)
{
return kerndat_fs_virtualized(KERNDAT_FS_STAT_BINFMT_MISC, pm->s_dev);
}
static int parse_binfmt_misc_entry(struct bfd *f, BinfmtMiscEntry *bme)
{
while (1) {
char *str;
str = breadline(f);
if (IS_ERR(str))
return -1;
if (!str)
break;
if (!strncmp(str, "enabled", 7)) {
bme->enabled = true;
continue;
}
if (!strncmp(str, "disabled", 8))
continue;
if (!strncmp(str, "offset ", 7)) {
if (sscanf(str + 7, "%i", &bme->offset) != 1)
return -1;
bme->has_offset = true;
continue;
}
#define DUP_EQUAL_AS(key, member) \
if (!strncmp(str, key, strlen(key))) { \
bme->member = xstrdup(str + strlen(key)); \
if (!bme->member) \
return -1; \
continue; \
}
DUP_EQUAL_AS("interpreter ", interpreter)
DUP_EQUAL_AS("flags: ", flags)
DUP_EQUAL_AS("extension .", extension)
DUP_EQUAL_AS("magic ", magic)
DUP_EQUAL_AS("mask ", mask)
#undef DUP_EQUAL_AS
pr_perror("binfmt_misc: unsupported feature %s", str);
return -1;
}
return 0;
}
static int dump_binfmt_misc_entry(int dfd, char *name, struct cr_img *img)
{
BinfmtMiscEntry bme = BINFMT_MISC_ENTRY__INIT;
struct bfd f;
int ret = -1;
f.fd = openat(dfd, name, O_RDONLY);
if (f.fd < 0) {
pr_perror("binfmt_misc: can't open %s", name);
return -1;
}
if (bfdopenr(&f))
return -1;
if (parse_binfmt_misc_entry(&f, &bme))
goto err;
bme.name = name;
if (pb_write_one(img, &bme, PB_BINFMT_MISC))
goto err;
ret = 0;
err:
free(bme.interpreter);
free(bme.flags);
free(bme.extension);
free(bme.magic);
free(bme.mask);
bclose(&f);
return ret;
}
static int binfmt_misc_dump(struct mount_info *pm)
{
static bool dumped = false;
struct cr_img *img = NULL;
struct dirent *de;
DIR *fdir = NULL;
int fd, ret;
ret = binfmt_misc_virtual(pm);
if (ret <= 0)
return ret;
if (dumped) {
pr_err("Second binfmt_misc superblock\n");
return -1;
}
dumped = true;
fd = open_mountpoint(pm);
if (fd < 0)
return fd;
fdir = fdopendir(fd);
if (fdir == NULL) {
close(fd);
return -1;
}
ret = -1;
while ((de = readdir(fdir))) {
if (dir_dots(de))
continue;
if (!strcmp(de->d_name, "register"))
continue;
if (!strcmp(de->d_name, "status"))
continue;
if (!img) {
/* Create image only if an extry exists, i.e. here */
img = open_image(CR_FD_BINFMT_MISC, O_DUMP);
if (!img)
goto out;
}
if (dump_binfmt_misc_entry(fd, de->d_name, img))
goto out;
}
ret = 0;
out:
if (img)
close_image(img);
closedir(fdir);
return ret;
}
static int write_binfmt_misc_entry(char *mp, char *buf, BinfmtMiscEntry *bme)
{
int fd, len, ret = -1;
char path[PATH_MAX+1];
snprintf(path, PATH_MAX, "%s/register", mp);
fd = open(path, O_WRONLY);
if (fd < 0) {
pr_perror("binfmt_misc: can't open %s", path);
return -1;
}
len = strlen(buf);
if (write(fd, buf, len) != len) {
pr_perror("binfmt_misc: can't write to %s", path);
goto close;
}
if (!bme->enabled) {
close(fd);
snprintf(path, PATH_MAX, "%s/%s", mp, bme->name);
fd = open(path, O_WRONLY);
if (!fd) {
pr_perror("binfmt_misc: can't open %s", path);
goto out;
}
if (write(fd, "0", 1) != 1) {
pr_perror("binfmt_misc: can't write to %s", path);
goto close;
}
}
ret = 0;
close:
close(fd);
out:
return ret;
}
#define BINFMT_MISC_STR (1920 + 1)
static int make_bfmtm_magic_str(char *buf, BinfmtMiscEntry *bme)
{
int i, len;
/*
* Format is ":name:type(M):offset:magic:mask:interpreter:flags".
* Magic and mask are special fields. Kernel outputs them as
* a sequence of hexadecimal numbers (abc -> 616263), and we
* dump them without changes. But for registering a new entry
* it expects every byte is prepended with \x, i.e. \x61\x62\x63.
*/
len = strlen(bme->name) + 3 /* offset < 128 */ + 2 * strlen(bme->magic)
+ (bme->mask ? 2 * strlen(bme->mask) : 0) + strlen(bme->interpreter)
+ (bme->flags ? strlen(bme->flags) : 0) + strlen(":::::::");
if ((len > BINFMT_MISC_STR - 1) || bme->offset > 128)
return -1;
buf += sprintf(buf, ":%s:M:%d:", bme->name, bme->offset);
len = strlen(bme->magic);
for (i = 0; i < len; i += 2)
buf += sprintf(buf, "\\x%c%c", bme->magic[i], bme->magic[i + 1]);
buf += sprintf(buf, ":");
if (bme->mask) {
len = strlen(bme->mask);
for (i = 0; i < len; i += 2)
buf += sprintf(buf, "\\x%c%c", bme->mask[i], bme->mask[i + 1]);
}
sprintf(buf, ":%s:%s", bme->interpreter, bme->flags ? : "\0");
return 1;
}
static int binfmt_misc_restore_bme(struct mount_info *mi, BinfmtMiscEntry *bme, char *buf)
{
int ret;
/* :name:type:offset:magic/extension:mask:interpreter:flags */
if ((!bme->magic && !bme->extension) || !bme->interpreter) {
pr_perror("binfmt_misc: bad dump");
ret = -1;
} else if (bme->magic) {
ret = make_bfmtm_magic_str(buf, bme);
} else if (bme->extension) {
/* :name:E::extension::interpreter:flags */
ret = snprintf(buf, BINFMT_MISC_STR, ":%s:E::%s::%s:%s",
bme->name, bme->extension, bme->interpreter,
bme->flags ? : "\0");
}
if (ret > 0) {
pr_debug("binfmt_misc_pattern=%s\n", buf);
ret = write_binfmt_misc_entry(mi->mountpoint, buf, bme);
}
return ret;
}
static int binfmt_misc_restore(struct mount_info *mi)
{
struct cr_img *img;
char *buf;
int ret = -1;
buf = xmalloc(BINFMT_MISC_STR);
if (!buf)
return -1;
if (!list_empty(&binfmt_misc_list)) {
struct binfmt_misc_info *bmi;
list_for_each_entry(bmi, &binfmt_misc_list, list) {
ret = binfmt_misc_restore_bme(mi, bmi->bme, buf);
if (ret)
break;
}
goto free_buf;
}
img = open_image(CR_FD_BINFMT_MISC_OLD, O_RSTR, mi->s_dev);
if (!img) {
pr_err("Can't open binfmt_misc_old image\n");
goto free_buf;
} else if (empty_image(img)) {
close_image(img);
ret = 0;
goto free_buf;
}
ret = 0;
while (ret == 0) {
BinfmtMiscEntry *bme;
ret = pb_read_one_eof(img, &bme, PB_BINFMT_MISC);
if (ret <= 0)
break;
ret = binfmt_misc_restore_bme(mi, bme, buf);
binfmt_misc_entry__free_unpacked(bme, NULL);
}
close_image(img);
free_buf:
free(buf);
return ret;
}
static int collect_one_binfmt_misc_entry(void *o, ProtobufCMessage *msg, struct cr_img *img)
{
struct binfmt_misc_info *bmi = o;
bmi->bme = pb_msg(msg, BinfmtMiscEntry);
list_add_tail(&bmi->list, &binfmt_misc_list);
return 0;
}
struct collect_image_info binfmt_misc_cinfo = {
.fd_type = CR_FD_BINFMT_MISC,
.pb_type = PB_BINFMT_MISC,
.priv_size = sizeof(struct binfmt_misc_info),
.collect = collect_one_binfmt_misc_entry,
};
int collect_binfmt_misc(void)
{
return collect_image(&binfmt_misc_cinfo);
}
static int tmpfs_dump(struct mount_info *pm)
{
int ret = -1, fd = -1, userns_pid = -1;
char tmpfs_path[PSFDS];
struct cr_img *img;
fd = open_mountpoint(pm);
if (fd < 0)
return fd;
/* if fd happens to be 0 here, we need to move it to something
* non-zero, because cr_system_userns closes STDIN_FILENO as we are not
* interested in passing stdin to tar.
*/
if (move_fd_from(&fd, STDIN_FILENO) < 0)
goto out;
if (fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) & ~FD_CLOEXEC) == -1) {
pr_perror("Can not drop FD_CLOEXEC");
goto out;
}
img = open_image(CR_FD_TMPFS_DEV, O_DUMP, pm->s_dev);
if (!img)
goto out;
sprintf(tmpfs_path, "/proc/self/fd/%d", fd);
if (root_ns_mask & CLONE_NEWUSER)
userns_pid = root_item->pid.real;
ret = cr_system_userns(-1, img_raw_fd(img), -1, "tar", (char *[])
{ "tar", "--create",
"--gzip",
"--no-unquote",
"--no-wildcards",
"--one-file-system",
"--check-links",
"--preserve-permissions",
"--sparse",
"--numeric-owner",
"--directory", tmpfs_path, ".", NULL }, 0, userns_pid);
if (ret)
pr_err("Can't dump tmpfs content\n");
close_image(img);
out:
close_safe(&fd);
return ret;
}
static int tmpfs_restore(struct mount_info *pm)
{
int ret;
struct cr_img *img;
img = open_image(CR_FD_TMPFS_DEV, O_RSTR, pm->s_dev);
if (empty_image(img)) {
close_image(img);
img = open_image(CR_FD_TMPFS_IMG, O_RSTR, pm->mnt_id);
}
if (!img)
return -1;
if (empty_image(img)) {
close_image(img);
return -1;
}
ret = cr_system(img_raw_fd(img), -1, -1, "tar",
(char *[]) {"tar", "--extract", "--gzip",
"--no-unquote", "--no-wildcards",
"--directory", pm->mountpoint, NULL}, 0);
close_image(img);
if (ret) {
pr_err("Can't restore tmpfs content\n");
return -1;
}
return 0;
}
/*
* Virtualized devtmpfs on any side (dump or restore)
* means, that we should try to handle it as a plain
* tmpfs.
*
* Interesting case -- shared on dump and virtual on
* restore -- will fail, since no tarball with the fs
* contents will be found.
*/
static int devtmpfs_virtual(struct mount_info *pm)
{
return kerndat_fs_virtualized(KERNDAT_FS_STAT_DEVTMPFS, pm->s_dev);
}
static int devtmpfs_dump(struct mount_info *pm)
{
int ret;
ret = devtmpfs_virtual(pm);
if (ret == 1)
ret = tmpfs_dump(pm);
return ret;
}
static int devtmpfs_restore(struct mount_info *pm)
{
int ret;
ret = devtmpfs_virtual(pm);
if (ret == 1)
ret = tmpfs_restore(pm);
return ret;
}
/* Is it mounted w or w/o the newinstance option */
static int devpts_parse(struct mount_info *pm)
{
int ret;
ret = kerndat_fs_virtualized(KERNDAT_FS_STAT_DEVPTS, pm->s_dev);
if (ret <= 0)
return ret;
/*
* Kernel hides this option, but if the fs instance
* is new (virtualized) we know that it was created
* with -o newinstance.
*/
return attach_option(pm, "newinstance");
}
static int fusectl_dump(struct mount_info *pm)
{
int fd, ret = -1;
struct dirent *de;
DIR *fdir = NULL;
fd = open_mountpoint(pm);
if (fd < 0)
return fd;
fdir = fdopendir(fd);
if (fdir == NULL) {
close(fd);
return -1;
}
while ((de = readdir(fdir))) {
int id;
struct mount_info *it;
if (dir_dots(de))
continue;
if (sscanf(de->d_name, "%d", &id) != 1) {
pr_err("wrong number of items scanned in fusectl dump\n");
goto out;
}
for (it = mntinfo; it; it = it->next) {
if (it->fstype->code == FSTYPE__FUSE &&
id == kdev_minor(it->s_dev) && !it->external) {
pr_err("%s is a fuse mount but not external\n", it->mountpoint);
goto out;
}
}
}
ret = 0;
out:
closedir(fdir);
return ret;
}
static int debugfs_parse(struct mount_info *pm)
{
/* tracefs is automounted underneath debugfs sometimes, and the
* kernel's overmounting protection prevents us from mounting debugfs
* first without tracefs, so let's always mount debugfs MS_REC.
*/
pm->flags |= MS_REC;
return 0;
}
static int tracefs_parse(struct mount_info *pm)
{
return 1;
}
static int cgroup_parse(struct mount_info *pm)
{
if (!(root_ns_mask & CLONE_NEWCGROUP))
return 0;
/* cgroup namespaced mounts don't look rooted to CRIU, so let's fake it
* here.
*/
pm->private = pm->root;
pm->root = xstrdup("/");
if (!pm->root)
return -1;
return 0;
}
static int dump_empty_fs(struct mount_info *pm)
{
int fd, ret = -1;
fd = open_mountpoint(pm);
if (fd < 0)
return fd;
ret = is_empty_dir(fd);
close(fd);
if (ret < 0) {
pr_err("%s isn't empty\n", pm->fstype->name);
return -1;
}
return ret ? 0 : -1;
}
/*
* Some fses (fuse) cannot be dumped, so we should always fail on dump/restore
* of these fses.
*/
static int always_fail(struct mount_info *pm)
{
pr_err("failed to dump fs %s (%s): always fail\n", pm->mountpoint,
pm->fstype->name);
return -1;
}
static struct fstype fstypes[] = {
{
.name = "unsupported",
.code = FSTYPE__UNSUPPORTED,
}, {
.name = "auto_cr",
.code = FSTYPE__AUTO,
}, {
.name = "proc",
.code = FSTYPE__PROC,
}, {
.name = "sysfs",
.code = FSTYPE__SYSFS,
}, {
.name = "devtmpfs",
.code = FSTYPE__DEVTMPFS,
.dump = devtmpfs_dump,
.restore = devtmpfs_restore,
}, {
.name = "binfmt_misc",
.parse = binfmt_misc_parse,
.code = FSTYPE__BINFMT_MISC,
.dump = binfmt_misc_dump,
.restore = binfmt_misc_restore,
}, {
.name = "tmpfs",
.code = FSTYPE__TMPFS,
.dump = tmpfs_dump,
.restore = tmpfs_restore,
}, {
.name = "devpts",
.parse = devpts_parse,
.code = FSTYPE__DEVPTS,
}, {
.name = "simfs",
.code = FSTYPE__SIMFS,
}, {
.name = "btrfs",
.code = FSTYPE__UNSUPPORTED,
}, {
.name = "pstore",
.dump = dump_empty_fs,
.code = FSTYPE__PSTORE,
}, {
.name = "mqueue",
.dump = dump_empty_fs,
.code = FSTYPE__MQUEUE,
}, {
.name = "securityfs",
.code = FSTYPE__SECURITYFS,
}, {
.name = "fusectl",
.dump = fusectl_dump,
.code = FSTYPE__FUSECTL,
}, {
.name = "debugfs",
.code = FSTYPE__DEBUGFS,
.parse = debugfs_parse,
}, {
.name = "tracefs",
.code = FSTYPE__TRACEFS,
.parse = tracefs_parse,
}, {
.name = "cgroup",
.code = FSTYPE__CGROUP,
.parse = cgroup_parse,
}, {
.name = "aufs",
.code = FSTYPE__AUFS,
.parse = aufs_parse,
}, {
.name = "fuse",
.code = FSTYPE__FUSE,
.dump = always_fail,
.restore = always_fail,
}, {
.name = "overlay",
.code = FSTYPE__OVERLAYFS,
.parse = overlayfs_parse,
}, {
.name = "autofs",
.code = FSTYPE__AUTOFS,
.parse = autofs_parse,
.dump = autofs_dump,
.mount = autofs_mount,
},
};
struct fstype *fstype_auto(void) { return &fstypes[1]; }
static char fsauto_all[] = "all";
static char *fsauto_names;
static bool css_contains(const char *css, const char *str)
{
int len = strlen(str);
const char *cur;
if (!len)
return false;
for (cur = css; (cur = strstr(cur, str)); cur += len) {
if (cur > css && cur[-1] != ',')
continue;
if (cur[len] && cur[len] != ',')
continue;
return true;
}
return false;
}
static bool fsname_is_auto(const char *name)
{
if (!fsauto_names)
return false;
if (fsauto_names == fsauto_all)
return true;
return css_contains(fsauto_names, name);
}
bool add_fsname_auto(const char *names)
{
char *old = fsauto_names;
if (old == fsauto_all)
return true;
if (css_contains(names, fsauto_all))
fsauto_names = fsauto_all;
else if (!old)
fsauto_names = xstrdup(names);
else {
if (asprintf(&fsauto_names, "%s,%s", old, names) < 0)
fsauto_names = NULL;
}
xfree(old);
return fsauto_names != NULL;
}
struct fstype *find_fstype_by_name(char *fst)
{
int i;
/*
* This fn is required for two things.
* 1st -- to check supported filesystems (as just mounting
* anything is wrong, almost every fs has its own features)
* 2nd -- save some space in the image (since we scan all
* names anyway)
*/
for (i = 1; i < ARRAY_SIZE(fstypes); i++) {
struct fstype *fstype = fstypes + i;
if (!strcmp(fstype->name, fst))
return fstype;
}
if (fsname_is_auto(fst))
return &fstypes[1];
return &fstypes[0];
}
struct fstype *decode_fstype(u32 fst)
{
int i;
if (fst == FSTYPE__UNSUPPORTED)
goto uns;
for (i = 1; i < ARRAY_SIZE(fstypes); i++) {
struct fstype *fstype = fstypes + i;
if (!fstype->name)
break;
if (fstype->code == fst)
return fstype;
}
uns:
return &fstypes[0];
}
......@@ -28,6 +28,7 @@
#include "fsnotify.h"
#include "fdinfo.h"
#include "mount.h"
#include "filesystems.h"
#include "image.h"
#include "util.h"
#include "files.h"
......
#ifndef __CR_FILESYSTEMS_H__
#define __CR_FILESYSTEMS_H__
extern struct fstype *find_fstype_by_name(char *fst);
extern struct fstype *decode_fstype(u32 fst);
extern bool add_fsname_auto(const char *names);
struct mount_info;
typedef int (*mount_fn_t)(struct mount_info *mi, const char *src, const
char *fstype, unsigned long mountflags);
struct fstype {
char *name;
int code;
int (*dump)(struct mount_info *pm);
int (*restore)(struct mount_info *pm);
int (*parse)(struct mount_info *pm);
mount_fn_t mount;
};
extern struct fstype *fstype_auto(void);
/* callback for AUFS support */
extern int aufs_parse(struct mount_info *mi);
/* callback for OverlayFS support */
extern int overlayfs_parse(struct mount_info *mi);
/* FIXME -- remove */
extern struct list_head binfmt_misc_list;
#endif
......@@ -102,8 +102,7 @@ extern struct ns_id *lookup_nsid_by_mnt_id(int mnt_id);
extern int open_mount(unsigned int s_dev);
extern int __open_mountpoint(struct mount_info *pm, int mnt_fd);
extern struct fstype *find_fstype_by_name(char *fst);
extern bool add_fsname_auto(const char *names);
extern int open_mountpoint(struct mount_info *pm);
extern struct mount_info *collect_mntinfo(struct ns_id *ns, bool for_dump);
extern int prepare_mnt_ns(void);
......@@ -130,29 +129,10 @@ extern int read_mnt_ns_img(void);
extern void cleanup_mnt_ns(void);
extern void clean_cr_time_mounts(void);
struct mount_info;
typedef int (*mount_fn_t)(struct mount_info *mi, const char *src, const
char *fstype, unsigned long mountflags);
struct fstype {
char *name;
int code;
int (*dump)(struct mount_info *pm);
int (*restore)(struct mount_info *pm);
int (*parse)(struct mount_info *pm);
mount_fn_t mount;
};
extern bool add_skip_mount(const char *mountpoint);
struct ns_id;
extern struct mount_info *parse_mountinfo(pid_t pid, struct ns_id *nsid, bool for_dump);
/* callback for AUFS support */
extern int aufs_parse(struct mount_info *mi);
/* callback for OverlayFS support */
extern int overlayfs_parse(struct mount_info *mi);
extern int check_mnt_id(void);
#endif /* __CR_MOUNT_H__ */
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <dirent.h>
#include <errno.h>
......@@ -19,21 +17,18 @@
#include "util-pie.h"
#include "log.h"
#include "plugin.h"
#include "filesystems.h"
#include "mount.h"
#include "pstree.h"
#include "image.h"
#include "namespaces.h"
#include "protobuf.h"
#include "kerndat.h"
#include "fs-magic.h"
#include "sysfs_parse.h"
#include "path.h"
#include "autofs.h"
#include "files-reg.h"
#include "external.h"
#include "images/mnt.pb-c.h"
#include "images/binfmt-misc.pb-c.h"
/*
* Put a : in here since those are invalid on
......@@ -49,14 +44,6 @@
#define BINFMT_MISC_HOME "/proc/sys/fs/binfmt_misc"
#define CRTIME_MNT_ID 0
struct binfmt_misc_info {
BinfmtMiscEntry *bme;
struct list_head list;
};
static LIST_HEAD(binfmt_misc_list);
static struct fstype fstypes[];
int ext_mount_add(char *key, char *val)
{
struct ext_mount *em;
......@@ -120,8 +107,6 @@ static void mntinfo_add_list(struct mount_info *new)
}
}
static int open_mountpoint(struct mount_info *pm);
static struct mount_info *mnt_build_tree(struct mount_info *list, struct mount_info *roots_mp);
static int validate_mounts(struct mount_info *info, bool for_dump);
......@@ -527,7 +512,7 @@ static int try_resolve_ext_mount(struct mount_info *info)
return -1;
snprintf(source, len, "dev[%s]", val);
info->fstype = &fstypes[1];
info->fstype = fstype_auto();
BUG_ON(info->fstype->code != FSTYPE__AUTO);
xfree(info->source);
info->source = source;
......@@ -1086,7 +1071,7 @@ static char *get_clean_mnt(struct mount_info *mi, char *mnt_path_tmp, char *mnt_
}
#define MNT_UNREACHABLE INT_MIN
static int open_mountpoint(struct mount_info *pm)
int open_mountpoint(struct mount_info *pm)
{
struct mount_info *c;
int fd = -1, ns_old = -1;
......@@ -1159,15 +1144,6 @@ out:
return -1;
}
static int attach_option(struct mount_info *pm, char *opt)
{
if (pm->options[0] == '\0')
pm->options = xstrcat(pm->options, "%s", opt);
else
pm->options = xstrcat(pm->options, ",%s", opt);
return pm->options ? 0 : -1;
}
static int add_cr_time_mount(struct mount_info *root, char *fsname, const char *path, unsigned int s_dev)
{
struct mount_info *mi, *t, *parent;
......@@ -1215,152 +1191,6 @@ static int add_cr_time_mount(struct mount_info *root, char *fsname, const char *
return 0;
}
/* Is it mounted w or w/o the newinstance option */
static int devpts_parse(struct mount_info *pm)
{
int ret;
ret = kerndat_fs_virtualized(KERNDAT_FS_STAT_DEVPTS, pm->s_dev);
if (ret <= 0)
return ret;
/*
* Kernel hides this option, but if the fs instance
* is new (virtualized) we know that it was created
* with -o newinstance.
*/
return attach_option(pm, "newinstance");
}
static int tmpfs_dump(struct mount_info *pm)
{
int ret = -1, fd = -1, userns_pid = -1;
char tmpfs_path[PSFDS];
struct cr_img *img;
fd = open_mountpoint(pm);
if (fd < 0)
return fd;
/* if fd happens to be 0 here, we need to move it to something
* non-zero, because cr_system_userns closes STDIN_FILENO as we are not
* interested in passing stdin to tar.
*/
if (move_fd_from(&fd, STDIN_FILENO) < 0)
goto out;
if (fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) & ~FD_CLOEXEC) == -1) {
pr_perror("Can not drop FD_CLOEXEC");
goto out;
}
img = open_image(CR_FD_TMPFS_DEV, O_DUMP, pm->s_dev);
if (!img)
goto out;
sprintf(tmpfs_path, "/proc/self/fd/%d", fd);
if (root_ns_mask & CLONE_NEWUSER)
userns_pid = root_item->pid.real;
ret = cr_system_userns(-1, img_raw_fd(img), -1, "tar", (char *[])
{ "tar", "--create",
"--gzip",
"--no-unquote",
"--no-wildcards",
"--one-file-system",
"--check-links",
"--preserve-permissions",
"--sparse",
"--numeric-owner",
"--directory", tmpfs_path, ".", NULL }, 0, userns_pid);
if (ret)
pr_err("Can't dump tmpfs content\n");
close_image(img);
out:
close_safe(&fd);
return ret;
}
/*
* Virtualized devtmpfs on any side (dump or restore)
* means, that we should try to handle it as a plain
* tmpfs.
*
* Interesting case -- shared on dump and virtual on
* restore -- will fail, since no tarball with the fs
* contents will be found.
*/
static int devtmpfs_virtual(struct mount_info *pm)
{
return kerndat_fs_virtualized(KERNDAT_FS_STAT_DEVTMPFS, pm->s_dev);
}
static int devtmpfs_dump(struct mount_info *pm)
{
int ret;
ret = devtmpfs_virtual(pm);
if (ret == 1)
ret = tmpfs_dump(pm);
return ret;
}
static int tmpfs_restore(struct mount_info *pm)
{
int ret;
struct cr_img *img;
img = open_image(CR_FD_TMPFS_DEV, O_RSTR, pm->s_dev);
if (empty_image(img)) {
close_image(img);
img = open_image(CR_FD_TMPFS_IMG, O_RSTR, pm->mnt_id);
}
if (!img)
return -1;
if (empty_image(img)) {
close_image(img);
return -1;
}
ret = cr_system(img_raw_fd(img), -1, -1, "tar",
(char *[]) {"tar", "--extract", "--gzip",
"--no-unquote", "--no-wildcards",
"--directory", pm->mountpoint, NULL}, 0);
close_image(img);
if (ret) {
pr_err("Can't restore tmpfs content\n");
return -1;
}
return 0;
}
static int devtmpfs_restore(struct mount_info *pm)
{
int ret;
ret = devtmpfs_virtual(pm);
if (ret == 1)
ret = tmpfs_restore(pm);
return ret;
}
static int binfmt_misc_parse(struct mount_info *pm)
{
if (pm->nsid->type == NS_ROOT)
opts.has_binfmt_misc = true;
return 0;
}
/* Returns 1 in case of success, -errno in case of mount fail, and 0 on other errors */
static int mount_cr_time_mount(struct ns_id *ns, unsigned int *s_dev, const char *source,
const char *target, const char *type)
......@@ -1394,614 +1224,7 @@ out:
return ret < 0 ? 0 : exit_code;
}
static int binfmt_misc_virtual(struct mount_info *pm)
{
return kerndat_fs_virtualized(KERNDAT_FS_STAT_BINFMT_MISC, pm->s_dev);
}
static int parse_binfmt_misc_entry(struct bfd *f, BinfmtMiscEntry *bme)
{
while (1) {
char *str;
str = breadline(f);
if (IS_ERR(str))
return -1;
if (!str)
break;
if (!strncmp(str, "enabled", 7)) {
bme->enabled = true;
continue;
}
if (!strncmp(str, "disabled", 8))
continue;
if (!strncmp(str, "offset ", 7)) {
if (sscanf(str + 7, "%i", &bme->offset) != 1)
return -1;
bme->has_offset = true;
continue;
}
#define DUP_EQUAL_AS(key, member) \
if (!strncmp(str, key, strlen(key))) { \
bme->member = xstrdup(str + strlen(key)); \
if (!bme->member) \
return -1; \
continue; \
}
DUP_EQUAL_AS("interpreter ", interpreter)
DUP_EQUAL_AS("flags: ", flags)
DUP_EQUAL_AS("extension .", extension)
DUP_EQUAL_AS("magic ", magic)
DUP_EQUAL_AS("mask ", mask)
#undef DUP_EQUAL_AS
pr_perror("binfmt_misc: unsupported feature %s", str);
return -1;
}
return 0;
}
static int dump_binfmt_misc_entry(int dfd, char *name, struct cr_img *img)
{
BinfmtMiscEntry bme = BINFMT_MISC_ENTRY__INIT;
struct bfd f;
int ret = -1;
f.fd = openat(dfd, name, O_RDONLY);
if (f.fd < 0) {
pr_perror("binfmt_misc: can't open %s", name);
return -1;
}
if (bfdopenr(&f))
return -1;
if (parse_binfmt_misc_entry(&f, &bme))
goto err;
bme.name = name;
if (pb_write_one(img, &bme, PB_BINFMT_MISC))
goto err;
ret = 0;
err:
free(bme.interpreter);
free(bme.flags);
free(bme.extension);
free(bme.magic);
free(bme.mask);
bclose(&f);
return ret;
}
static int binfmt_misc_dump(struct mount_info *pm)
{
static bool dumped = false;
struct cr_img *img = NULL;
struct dirent *de;
DIR *fdir = NULL;
int fd, ret;
ret = binfmt_misc_virtual(pm);
if (ret <= 0)
return ret;
if (dumped) {
pr_err("Second binfmt_misc superblock\n");
return -1;
}
dumped = true;
fd = open_mountpoint(pm);
if (fd < 0)
return fd;
fdir = fdopendir(fd);
if (fdir == NULL) {
close(fd);
return -1;
}
ret = -1;
while ((de = readdir(fdir))) {
if (dir_dots(de))
continue;
if (!strcmp(de->d_name, "register"))
continue;
if (!strcmp(de->d_name, "status"))
continue;
if (!img) {
/* Create image only if an extry exists, i.e. here */
img = open_image(CR_FD_BINFMT_MISC, O_DUMP);
if (!img)
goto out;
}
if (dump_binfmt_misc_entry(fd, de->d_name, img))
goto out;
}
ret = 0;
out:
if (img)
close_image(img);
closedir(fdir);
return ret;
}
static int write_binfmt_misc_entry(char *mp, char *buf, BinfmtMiscEntry *bme)
{
int fd, len, ret = -1;
char path[PATH_MAX+1];
snprintf(path, PATH_MAX, "%s/register", mp);
fd = open(path, O_WRONLY);
if (fd < 0) {
pr_perror("binfmt_misc: can't open %s", path);
return -1;
}
len = strlen(buf);
if (write(fd, buf, len) != len) {
pr_perror("binfmt_misc: can't write to %s", path);
goto close;
}
if (!bme->enabled) {
close(fd);
snprintf(path, PATH_MAX, "%s/%s", mp, bme->name);
fd = open(path, O_WRONLY);
if (!fd) {
pr_perror("binfmt_misc: can't open %s", path);
goto out;
}
if (write(fd, "0", 1) != 1) {
pr_perror("binfmt_misc: can't write to %s", path);
goto close;
}
}
ret = 0;
close:
close(fd);
out:
return ret;
}
#define BINFMT_MISC_STR (1920 + 1)
static int make_bfmtm_magic_str(char *buf, BinfmtMiscEntry *bme)
{
int i, len;
/*
* Format is ":name:type(M):offset:magic:mask:interpreter:flags".
* Magic and mask are special fields. Kernel outputs them as
* a sequence of hexadecimal numbers (abc -> 616263), and we
* dump them without changes. But for registering a new entry
* it expects every byte is prepended with \x, i.e. \x61\x62\x63.
*/
len = strlen(bme->name) + 3 /* offset < 128 */ + 2 * strlen(bme->magic)
+ (bme->mask ? 2 * strlen(bme->mask) : 0) + strlen(bme->interpreter)
+ (bme->flags ? strlen(bme->flags) : 0) + strlen(":::::::");
if ((len > BINFMT_MISC_STR - 1) || bme->offset > 128)
return -1;
buf += sprintf(buf, ":%s:M:%d:", bme->name, bme->offset);
len = strlen(bme->magic);
for (i = 0; i < len; i += 2)
buf += sprintf(buf, "\\x%c%c", bme->magic[i], bme->magic[i + 1]);
buf += sprintf(buf, ":");
if (bme->mask) {
len = strlen(bme->mask);
for (i = 0; i < len; i += 2)
buf += sprintf(buf, "\\x%c%c", bme->mask[i], bme->mask[i + 1]);
}
sprintf(buf, ":%s:%s", bme->interpreter, bme->flags ? : "\0");
return 1;
}
static int binfmt_misc_restore_bme(struct mount_info *mi, BinfmtMiscEntry *bme, char *buf)
{
int ret;
/* :name:type:offset:magic/extension:mask:interpreter:flags */
if ((!bme->magic && !bme->extension) || !bme->interpreter) {
pr_perror("binfmt_misc: bad dump");
ret = -1;
} else if (bme->magic) {
ret = make_bfmtm_magic_str(buf, bme);
} else if (bme->extension) {
/* :name:E::extension::interpreter:flags */
ret = snprintf(buf, BINFMT_MISC_STR, ":%s:E::%s::%s:%s",
bme->name, bme->extension, bme->interpreter,
bme->flags ? : "\0");
}
if (ret > 0) {
pr_debug("binfmt_misc_pattern=%s\n", buf);
ret = write_binfmt_misc_entry(mi->mountpoint, buf, bme);
}
return ret;
}
static int binfmt_misc_restore(struct mount_info *mi)
{
struct cr_img *img;
char *buf;
int ret = -1;
buf = xmalloc(BINFMT_MISC_STR);
if (!buf)
return -1;
if (!list_empty(&binfmt_misc_list)) {
struct binfmt_misc_info *bmi;
list_for_each_entry(bmi, &binfmt_misc_list, list) {
ret = binfmt_misc_restore_bme(mi, bmi->bme, buf);
if (ret)
break;
}
goto free_buf;
}
img = open_image(CR_FD_BINFMT_MISC_OLD, O_RSTR, mi->s_dev);
if (!img) {
pr_err("Can't open binfmt_misc_old image\n");
goto free_buf;
} else if (empty_image(img)) {
close_image(img);
ret = 0;
goto free_buf;
}
ret = 0;
while (ret == 0) {
BinfmtMiscEntry *bme;
ret = pb_read_one_eof(img, &bme, PB_BINFMT_MISC);
if (ret <= 0)
break;
ret = binfmt_misc_restore_bme(mi, bme, buf);
binfmt_misc_entry__free_unpacked(bme, NULL);
}
close_image(img);
free_buf:
free(buf);
return ret;
}
static int collect_one_binfmt_misc_entry(void *o, ProtobufCMessage *msg, struct cr_img *img)
{
struct binfmt_misc_info *bmi = o;
bmi->bme = pb_msg(msg, BinfmtMiscEntry);
list_add_tail(&bmi->list, &binfmt_misc_list);
return 0;
}
struct collect_image_info binfmt_misc_cinfo = {
.fd_type = CR_FD_BINFMT_MISC,
.pb_type = PB_BINFMT_MISC,
.priv_size = sizeof(struct binfmt_misc_info),
.collect = collect_one_binfmt_misc_entry,
};
int collect_binfmt_misc(void)
{
return collect_image(&binfmt_misc_cinfo);
}
static int fusectl_dump(struct mount_info *pm)
{
int fd, ret = -1;
struct dirent *de;
DIR *fdir = NULL;
fd = open_mountpoint(pm);
if (fd < 0)
return fd;
fdir = fdopendir(fd);
if (fdir == NULL) {
close(fd);
return -1;
}
while ((de = readdir(fdir))) {
int id;
struct mount_info *it;
if (dir_dots(de))
continue;
if (sscanf(de->d_name, "%d", &id) != 1) {
pr_err("wrong number of items scanned in fusectl dump\n");
goto out;
}
for (it = mntinfo; it; it = it->next) {
if (it->fstype->code == FSTYPE__FUSE &&
id == kdev_minor(it->s_dev) && !it->external) {
pr_err("%s is a fuse mount but not external\n", it->mountpoint);
goto out;
}
}
}
ret = 0;
out:
closedir(fdir);
return ret;
}
static int debugfs_parse(struct mount_info *pm)
{
/* tracefs is automounted underneath debugfs sometimes, and the
* kernel's overmounting protection prevents us from mounting debugfs
* first without tracefs, so let's always mount debugfs MS_REC.
*/
pm->flags |= MS_REC;
return 0;
}
static int tracefs_parse(struct mount_info *pm)
{
return 1;
}
static int cgroup_parse(struct mount_info *pm)
{
if (!(root_ns_mask & CLONE_NEWCGROUP))
return 0;
/* cgroup namespaced mounts don't look rooted to CRIU, so let's fake it
* here.
*/
pm->private = pm->root;
pm->root = xstrdup("/");
if (!pm->root)
return -1;
return 0;
}
static int dump_empty_fs(struct mount_info *pm)
{
int fd, ret = -1;
fd = open_mountpoint(pm);
if (fd < 0)
return fd;
ret = is_empty_dir(fd);
close(fd);
if (ret < 0) {
pr_err("%s isn't empty\n", pm->fstype->name);
return -1;
}
return ret ? 0 : -1;
}
/*
* Some fses (fuse) cannot be dumped, so we should always fail on dump/restore
* of these fses.
*/
static int always_fail(struct mount_info *pm)
{
pr_err("failed to dump fs %s (%s): always fail\n", pm->mountpoint,
pm->fstype->name);
return -1;
}
static struct fstype fstypes[] = {
{
.name = "unsupported",
.code = FSTYPE__UNSUPPORTED,
}, {
.name = "auto_cr",
.code = FSTYPE__AUTO,
}, {
.name = "proc",
.code = FSTYPE__PROC,
}, {
.name = "sysfs",
.code = FSTYPE__SYSFS,
}, {
.name = "devtmpfs",
.code = FSTYPE__DEVTMPFS,
.dump = devtmpfs_dump,
.restore = devtmpfs_restore,
}, {
.name = "binfmt_misc",
.parse = binfmt_misc_parse,
.code = FSTYPE__BINFMT_MISC,
.dump = binfmt_misc_dump,
.restore = binfmt_misc_restore,
}, {
.name = "tmpfs",
.code = FSTYPE__TMPFS,
.dump = tmpfs_dump,
.restore = tmpfs_restore,
}, {
.name = "devpts",
.parse = devpts_parse,
.code = FSTYPE__DEVPTS,
}, {
.name = "simfs",
.code = FSTYPE__SIMFS,
}, {
.name = "btrfs",
.code = FSTYPE__UNSUPPORTED,
}, {
.name = "pstore",
.dump = dump_empty_fs,
.code = FSTYPE__PSTORE,
}, {
.name = "mqueue",
.dump = dump_empty_fs,
.code = FSTYPE__MQUEUE,
}, {
.name = "securityfs",
.code = FSTYPE__SECURITYFS,
}, {
.name = "fusectl",
.dump = fusectl_dump,
.code = FSTYPE__FUSECTL,
}, {
.name = "debugfs",
.code = FSTYPE__DEBUGFS,
.parse = debugfs_parse,
}, {
.name = "tracefs",
.code = FSTYPE__TRACEFS,
.parse = tracefs_parse,
}, {
.name = "cgroup",
.code = FSTYPE__CGROUP,
.parse = cgroup_parse,
}, {
.name = "aufs",
.code = FSTYPE__AUFS,
.parse = aufs_parse,
}, {
.name = "fuse",
.code = FSTYPE__FUSE,
.dump = always_fail,
.restore = always_fail,
}, {
.name = "overlay",
.code = FSTYPE__OVERLAYFS,
.parse = overlayfs_parse,
}, {
.name = "autofs",
.code = FSTYPE__AUTOFS,
.parse = autofs_parse,
.dump = autofs_dump,
.mount = autofs_mount,
},
};
static char fsauto_all[] = "all";
static char *fsauto_names;
static bool css_contains(const char *css, const char *str)
{
int len = strlen(str);
const char *cur;
if (!len)
return false;
for (cur = css; (cur = strstr(cur, str)); cur += len) {
if (cur > css && cur[-1] != ',')
continue;
if (cur[len] && cur[len] != ',')
continue;
return true;
}
return false;
}
static bool fsname_is_auto(const char *name)
{
if (!fsauto_names)
return false;
if (fsauto_names == fsauto_all)
return true;
return css_contains(fsauto_names, name);
}
bool add_fsname_auto(const char *names)
{
char *old = fsauto_names;
if (old == fsauto_all)
return true;
if (css_contains(names, fsauto_all))
fsauto_names = fsauto_all;
else if (!old)
fsauto_names = xstrdup(names);
else {
if (asprintf(&fsauto_names, "%s,%s", old, names) < 0)
fsauto_names = NULL;
}
xfree(old);
return fsauto_names != NULL;
}
struct fstype *find_fstype_by_name(char *fst)
{
int i;
/*
* This fn is required for two things.
* 1st -- to check supported filesystems (as just mounting
* anything is wrong, almost every fs has its own features)
* 2nd -- save some space in the image (since we scan all
* names anyway)
*/
for (i = 1; i < ARRAY_SIZE(fstypes); i++) {
struct fstype *fstype = fstypes + i;
if (!strcmp(fstype->name, fst))
return fstype;
}
if (fsname_is_auto(fst))
return &fstypes[1];
return &fstypes[0];
}
static struct fstype *decode_fstype(u32 fst)
{
int i;
if (fst == FSTYPE__UNSUPPORTED)
goto uns;
for (i = 1; i < ARRAY_SIZE(fstypes); i++) {
struct fstype *fstype = fstypes + i;
if (!fstype->name)
break;
if (fstype->code == fst)
return fstype;
}
uns:
return &fstypes[0];
}
static int dump_one_fs(struct mount_info *mi)
{
......
......@@ -14,6 +14,7 @@
#include "list.h"
#include "util.h"
#include "mount.h"
#include "filesystems.h"
#include "mman.h"
#include "cpu.h"
#include "file-lock.h"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment