Commit de8fd000 authored by Kirill Tkhai's avatar Kirill Tkhai Committed by Pavel Emelyanov

fs: Add binfmt_misc support

This patch implements checkpoint/restore functionality
for binfmt_misc mounts. Both magic and extension types
and "disabled" state are supported.
Signed-off-by: 's avatarKirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: 's avatarPavel Emelyanov <xemul@parallels.com>
parent 70b0e161
......@@ -79,6 +79,7 @@ struct cr_fd_desc_tmpl imgset_template[CR_FD_MAX] = {
FD_ENTRY_F(IP6TABLES, "ip6tables-%d", O_NOBUF),
FD_ENTRY_F(TMPFS_IMG, "tmpfs-%d.tar.gz", O_NOBUF),
FD_ENTRY_F(TMPFS_DEV, "tmpfs-dev-%d.tar.gz", O_NOBUF),
FD_ENTRY(BINFMT_MISC, "binfmt-misc-%d"),
FD_ENTRY(TTY_FILES, "tty"),
FD_ENTRY(TTY_INFO, "tty-info"),
FD_ENTRY(FILE_LOCKS, "filelocks"),
......
......@@ -82,6 +82,7 @@ enum {
CR_FD_TMPFS_IMG,
CR_FD_TMPFS_DEV,
CR_FD_BINFMT_MISC,
CR_FD_PAGES,
CR_FD_VMAS,
......
......@@ -90,6 +90,7 @@
#define CPUINFO_MAGIC 0x61404013 /* Nyandoma */
#define USERNS_MAGIC 0x55474906 /* Kazan */
#define SECCOMP_MAGIC 0x64413049 /* Kostomuksha */
#define BINFMT_MISC_MAGIC 0x67343323 /* Apatity */
#define IFADDR_MAGIC RAW_IMAGE_MAGIC
#define ROUTE_MAGIC RAW_IMAGE_MAGIC
......
......@@ -56,12 +56,13 @@ enum {
PB_CPUINFO,
PB_USERNS,
PB_NETNS,
PB_BINFMT_MISC, /* 50 */
/* PB_AUTOGEN_STOP */
PB_PAGEMAP_HEAD,
PB_IDS,
PB_SIGACT, /* 50 */
PB_SIGACT,
PB_NETDEV,
PB_REMAP_FPATH,
PB_SK_QUEUES,
......
......@@ -29,6 +29,7 @@
#include "sysfs_parse.h"
#include "protobuf/mnt.pb-c.h"
#include "protobuf/binfmt-misc.pb-c.h"
#define AUTODETECTED_MOUNT "CRIU:AUTOGENERATED"
#define MS_PROPAGATE (MS_SHARED | MS_PRIVATE | MS_UNBINDABLE | MS_SLAVE)
......@@ -1282,8 +1283,90 @@ static int devtmpfs_restore(struct mount_info *pm)
return ret;
}
static int parse_binfmt_misc_entry(struct bfd *f, BinfmtMiscEntry *bme)
{
while (1) {
char *str;
str = breadline(f);
if (IS_ERR(str))
return -1;
if (!str)
break;
if (!strncmp(str, "enabled", 7)) {
bme->enabled = true;
continue;
}
if (!strncmp(str, "disabled", 8))
continue;
if (!strncmp(str, "offset ", 7)) {
if (sscanf(str + 7, "%i", &bme->offset) != 1)
return -1;
bme->has_offset = true;
continue;
}
#define DUP_EQUAL_AS(key, member) \
if (!strncmp(str, key, strlen(key))) { \
bme->member = xstrdup(str + strlen(key)); \
if (!bme->member) \
return -1; \
continue; \
}
DUP_EQUAL_AS("interpreter ", interpreter)
DUP_EQUAL_AS("flags: ", flags)
DUP_EQUAL_AS("extension .", extension)
DUP_EQUAL_AS("magic ", magic)
DUP_EQUAL_AS("mask ", mask)
#undef DUP_EQUAL_AS
pr_perror("binfmt_misc: unsupported feature %s\n", str);
return -1;
}
return 0;
}
static int dump_binfmt_misc_entry(int dfd, char *name, struct cr_img *img)
{
BinfmtMiscEntry bme = BINFMT_MISC_ENTRY__INIT;
struct bfd f;
int ret = -1;
f.fd = openat(dfd, name, O_RDONLY);
if (f.fd < 0) {
pr_perror("binfmt_misc: can't open %s", name);
return -1;
}
if (bfdopenr(&f))
return -1;
if (parse_binfmt_misc_entry(&f, &bme))
goto err;
bme.name = name;
if (pb_write_one(img, &bme, PB_BINFMT_MISC))
goto err;
ret = 0;
err:
free(bme.interpreter);
free(bme.flags);
free(bme.extension);
free(bme.magic);
free(bme.mask);
bclose(&f);
return ret;
}
static int binfmt_misc_dump(struct mount_info *pm)
{
struct cr_img *img;
int fd, ret = -1;
struct dirent *de;
DIR *fdir = NULL;
......@@ -1298,6 +1381,10 @@ static int binfmt_misc_dump(struct mount_info *pm)
return -1;
}
img = open_image(CR_FD_BINFMT_MISC, O_DUMP, pm->s_dev);
if (!img)
goto out;
while ((de = readdir(fdir))) {
if (dir_dots(de))
continue;
......@@ -1306,16 +1393,147 @@ static int binfmt_misc_dump(struct mount_info *pm)
if (!strcmp(de->d_name, "status"))
continue;
pr_err("binfmt_misc isn't empty: %s\n", de->d_name);
goto out;
if (dump_binfmt_misc_entry(fd, de->d_name, img))
goto out;
}
ret = 0;
out:
if (img)
close_image(img);
closedir(fdir);
return ret;
}
static int restore_binfmt_misc_entry(char *mp, char *buf, BinfmtMiscEntry *bme)
{
int fd, len, ret = -1;
char path[PATH_MAX+1];
snprintf(path, PATH_MAX, "%s/register", mp);
fd = open(path, O_WRONLY);
if (fd < 0) {
pr_perror("binfmt_misc: can't open %s", path);
return -1;
}
len = strlen(buf);
if (write(fd, buf, len) != len) {
pr_perror("binfmt_misc: can't write to %s", path);
goto close;
}
if (!bme->enabled) {
close(fd);
snprintf(path, PATH_MAX, "%s/%s", mp, bme->name);
fd = open(path, O_WRONLY);
if (!fd) {
pr_perror("binfmt_misc: can't open %s", path);
goto out;
}
if (write(fd, "0", 1) != 1) {
pr_perror("binfmt_misc: can't write to %s", path);
goto close;
}
}
ret = 0;
close:
close(fd);
out:
return ret;
}
#define BINFMT_MISC_STR (1920 + 1)
static int make_bfmtm_magic_str(char *buf, BinfmtMiscEntry *bme)
{
int i, len;
/*
* Format is ":name:type(M):offset:magic:mask:interpreter:flags".
* Magic and mask are special fields. Kernel outputs them as
* a sequence of hexidecimal numbers (abc -> 616263), and we
* dump them without changes. But for registering a new entry
* it expects every byte is prepended with \x, i.e. \x61\x62\x63.
*/
len = strlen(bme->name) + 3 /* offset < 128 */ + 2 * strlen(bme->magic)
+ (bme->mask ? 2 * strlen(bme->mask) : 0) + strlen(bme->interpreter)
+ (bme->flags ? strlen(bme->flags) : 0) + strlen(":::::::");
if ((len > BINFMT_MISC_STR - 1) || bme->offset > 128)
return -1;
buf += sprintf(buf, ":%s:M:%d:", bme->name, bme->offset);
len = strlen(bme->magic);
for (i = 0; i < len; i += 2)
buf += sprintf(buf, "\\x%c%c", bme->magic[i], bme->magic[i + 1]);
buf += sprintf(buf, ":");
if (bme->mask) {
len = strlen(bme->mask);
for (i = 0; i < len; i += 2)
buf += sprintf(buf, "\\x%c%c", bme->mask[i], bme->mask[i + 1]);
}
sprintf(buf, ":%s:%s", bme->interpreter, bme->flags ? : "\0");
return 1;
}
static int binfmt_misc_restore(struct mount_info *mi)
{
struct cr_img *img;
char *buf;
int ret = -1;;
buf = xmalloc(BINFMT_MISC_STR);
if (!buf)
return -1;
img = open_image(CR_FD_BINFMT_MISC, O_RSTR, mi->s_dev);
if (!img || empty_image(img)) {
goto free_buf;
}
ret = 0;
while (ret == 0) {
BinfmtMiscEntry *bme;
ret = pb_read_one_eof(img, &bme, PB_BINFMT_MISC);
if (ret <= 0)
break;
/* :name:type:offset:magic/extension:mask:interpreter:flags */
if ((!bme->magic && !bme->extension) || !bme->interpreter) {
pr_perror("binfmt_misc: bad dump");
ret = -1;
} else if (bme->magic) {
ret = make_bfmtm_magic_str(buf, bme);
pr_perror("xxxbuf=%s\n", buf);
} else if (bme->extension) {
/* :name:E::extension::interpreter:flags */
ret = snprintf(buf, BINFMT_MISC_STR, ":%s:E::%s::%s:%s",
bme->name, bme->extension, bme->interpreter,
bme->flags ? : "\0");
}
if (ret > 0)
ret = restore_binfmt_misc_entry(mi->mountpoint, buf, bme);
binfmt_misc_entry__free_unpacked(bme, NULL);
}
close_image(img);
free_buf:
free(buf);
return ret;
}
static int fusectl_dump(struct mount_info *pm)
{
int fd, ret = -1;
......@@ -1406,6 +1624,7 @@ static struct fstype fstypes[32] = {
.name = "binfmt_misc",
.code = FSTYPE__BINFMT_MISC,
.dump = binfmt_misc_dump,
.restore = binfmt_misc_restore,
}, {
.name = "tmpfs",
.code = FSTYPE__TMPFS,
......
......@@ -62,6 +62,7 @@
#include "protobuf/cpuinfo.pb-c.h"
#include "protobuf/userns.pb-c.h"
#include "protobuf/seccomp.pb-c.h"
#include "protobuf/binfmt-misc.pb-c.h"
struct cr_pb_message_desc cr_pb_descs[PB_MAX];
......
......@@ -56,6 +56,7 @@ proto-obj-y += userns.o
proto-obj-y += google/protobuf/descriptor.o # To make protoc-c happy and compile opts.proto
proto-obj-y += opts.o
proto-obj-y += seccomp.o
proto-obj-y += binfmt-misc.o
CFLAGS += -I$(obj)/
......
message binfmt_misc_entry {
required string name = 1;
required bool enabled = 2;
required string interpreter = 3;
optional string flags = 4;
optional string extension = 5;
optional string magic = 6;
optional string mask = 7;
optional int32 offset = 8;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment