Commit bff52ba9 authored by Cyrill Gorcunov's avatar Cyrill Gorcunov Committed by Pavel Emelyanov

inotify: Add checkpoint/restore v2

v2:
 - open_mount is cleaned up
 - byte-stream hex conversion remains untouched since
   strtol is flipping numbers to LE manner
Signed-off-by: 's avatarCyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: 's avatarPavel Emelyanov <xemul@parallels.com>
parent ed59bf00
...@@ -54,6 +54,7 @@ OBJS += shmem.o ...@@ -54,6 +54,7 @@ OBJS += shmem.o
OBJS += eventfd.o OBJS += eventfd.o
OBJS += eventpoll.o OBJS += eventpoll.o
OBJS += mount.o OBJS += mount.o
OBJS += inotify.o
DEPS := $(patsubst %.o,%.d,$(OBJS)) DEPS := $(patsubst %.o,%.d,$(OBJS))
......
...@@ -39,6 +39,7 @@ ...@@ -39,6 +39,7 @@
#include "sk-inet.h" #include "sk-inet.h"
#include "eventfd.h" #include "eventfd.h"
#include "eventpoll.h" #include "eventpoll.h"
#include "inotify.h"
#ifndef CONFIG_X86_64 #ifndef CONFIG_X86_64
# error No x86-32 support yet # error No x86-32 support yet
...@@ -329,6 +330,9 @@ static int do_dump_gen_file(const struct fd_parms *p, int lfd, ...@@ -329,6 +330,9 @@ static int do_dump_gen_file(const struct fd_parms *p, int lfd,
case FDINFO_EVENTPOLL: case FDINFO_EVENTPOLL:
ret = dump_one_eventpoll(lfd, e.id, p); ret = dump_one_eventpoll(lfd, e.id, p);
break; break;
case FDINFO_INOTIFY:
ret = dump_one_inotify(lfd, e.id, p);
break;
default: default:
ret = dump_one_reg_file(lfd, e.id, p); ret = dump_one_reg_file(lfd, e.id, p);
break; break;
...@@ -475,6 +479,13 @@ static int dump_eventpoll(struct fd_parms *p, int lfd, const struct cr_fdset *se ...@@ -475,6 +479,13 @@ static int dump_eventpoll(struct fd_parms *p, int lfd, const struct cr_fdset *se
return do_dump_gen_file(p, lfd, set); return do_dump_gen_file(p, lfd, set);
} }
static int dump_inotify(struct fd_parms *p, int lfd, const struct cr_fdset *set)
{
p->id = MAKE_FD_GENID(p->stat.st_dev, p->stat.st_ino, p->pos);
p->type = FDINFO_INOTIFY;
return do_dump_gen_file(p, lfd, set);
}
static int dump_one_file(pid_t pid, int fd, int lfd, char fd_flags, static int dump_one_file(pid_t pid, int fd, int lfd, char fd_flags,
const struct cr_fdset *cr_fdset) const struct cr_fdset *cr_fdset)
{ {
...@@ -502,6 +513,8 @@ static int dump_one_file(pid_t pid, int fd, int lfd, char fd_flags, ...@@ -502,6 +513,8 @@ static int dump_one_file(pid_t pid, int fd, int lfd, char fd_flags,
return dump_eventfd(&p, lfd, cr_fdset); return dump_eventfd(&p, lfd, cr_fdset);
else if (is_eventpoll_link(lfd)) else if (is_eventpoll_link(lfd))
return dump_eventpoll(&p, lfd, cr_fdset); return dump_eventpoll(&p, lfd, cr_fdset);
else if (is_inotify_link(lfd))
return dump_inotify(&p, lfd, cr_fdset);
} }
if (S_ISREG(p.stat.st_mode) || if (S_ISREG(p.stat.st_mode) ||
......
...@@ -42,6 +42,8 @@ ...@@ -42,6 +42,8 @@
#include "crtools.h" #include "crtools.h"
#include "namespaces.h" #include "namespaces.h"
#include "shmem.h" #include "shmem.h"
#include "mount.h"
#include "inotify.h"
static struct task_entries *task_entries; static struct task_entries *task_entries;
...@@ -170,6 +172,12 @@ static int prepare_shared(void) ...@@ -170,6 +172,12 @@ static int prepare_shared(void)
if (collect_eventpoll()) if (collect_eventpoll())
return -1; return -1;
if (collect_mount_info())
return -1;
if (collect_inotify())
return -1;
list_for_each_entry(pi, &tasks, list) { list_for_each_entry(pi, &tasks, list) {
ret = prepare_shmem_pid(pi->pid); ret = prepare_shmem_pid(pi->pid);
if (ret < 0) if (ret < 0)
......
...@@ -64,6 +64,7 @@ static char *fdtype2s(u8 type) ...@@ -64,6 +64,7 @@ static char *fdtype2s(u8 type)
[FDINFO_UNIXSK] = "usk", [FDINFO_UNIXSK] = "usk",
[FDINFO_EVENTFD] = "efd", [FDINFO_EVENTFD] = "efd",
[FDINFO_EVENTPOLL] = "epl", [FDINFO_EVENTPOLL] = "epl",
[FDINFO_INOTIFY] = "ify",
}; };
if (type > FDINFO_UND && type < FD_INFO_MAX) if (type > FDINFO_UND && type < FD_INFO_MAX)
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include "sk-inet.h" #include "sk-inet.h"
#include "eventfd.h" #include "eventfd.h"
#include "eventpoll.h" #include "eventpoll.h"
#include "inotify.h"
struct cr_options opts; struct cr_options opts;
...@@ -86,6 +87,20 @@ struct cr_fd_desc_tmpl fdset_template[CR_FD_MAX] = { ...@@ -86,6 +87,20 @@ struct cr_fd_desc_tmpl fdset_template[CR_FD_MAX] = {
.show = show_eventpoll_tfd, .show = show_eventpoll_tfd,
}, },
/* inotify descriptors */
[CR_FD_INOTIFY] = {
.fmt = FMT_FNAME_INOTIFY,
.magic = INOTIFY_MAGIC,
.show = show_inotify,
},
/* inotify descriptors */
[CR_FD_INOTIFY_WD] = {
.fmt = FMT_FNAME_INOTIFY_WD,
.magic = INOTIFY_WMAGIC,
.show = show_inotify_wd,
},
/* core data, such as regs and vmas and such */ /* core data, such as regs and vmas and such */
[CR_FD_CORE] = { [CR_FD_CORE] = {
.fmt = FMT_FNAME_CORE, .fmt = FMT_FNAME_CORE,
......
...@@ -58,6 +58,8 @@ enum { ...@@ -58,6 +58,8 @@ enum {
CR_FD_EVENTFD, CR_FD_EVENTFD,
CR_FD_EVENTPOLL, CR_FD_EVENTPOLL,
CR_FD_EVENTPOLL_TFD, CR_FD_EVENTPOLL_TFD,
CR_FD_INOTIFY,
CR_FD_INOTIFY_WD,
_CR_FD_GLOB_TO, _CR_FD_GLOB_TO,
CR_FD_MAX CR_FD_MAX
...@@ -126,6 +128,8 @@ extern struct cr_fd_desc_tmpl fdset_template[CR_FD_MAX]; ...@@ -126,6 +128,8 @@ extern struct cr_fd_desc_tmpl fdset_template[CR_FD_MAX];
#define FMT_FNAME_EVENTFD "eventfd.img" #define FMT_FNAME_EVENTFD "eventfd.img"
#define FMT_FNAME_EVENTPOLL "eventpoll.img" #define FMT_FNAME_EVENTPOLL "eventpoll.img"
#define FMT_FNAME_EVENTPOLL_TFD "eventpoll-tfd.img" #define FMT_FNAME_EVENTPOLL_TFD "eventpoll-tfd.img"
#define FMT_FNAME_INOTIFY "inotify.img"
#define FMT_FNAME_INOTIFY_WD "inotify-wd.img"
#define FMT_FNAME_ITIMERS "itimers-%d.img" #define FMT_FNAME_ITIMERS "itimers-%d.img"
#define FMT_FNAME_CREDS "creds-%d.img" #define FMT_FNAME_CREDS "creds-%d.img"
#define FMT_FNAME_UTSNS "utsns-%d.img" #define FMT_FNAME_UTSNS "utsns-%d.img"
......
...@@ -36,6 +36,8 @@ ...@@ -36,6 +36,8 @@
#define EVENTFD_MAGIC 0x44523722 /* Anapa */ #define EVENTFD_MAGIC 0x44523722 /* Anapa */
#define EVENTPOLL_MAGIC 0x45023858 /* Krasnodar */ #define EVENTPOLL_MAGIC 0x45023858 /* Krasnodar */
#define EVENTPOLL_TFD_MAGIC 0x44433746 /* Novorossiysk */ #define EVENTPOLL_TFD_MAGIC 0x44433746 /* Novorossiysk */
#define INOTIFY_MAGIC 0x48424431 /* Volgograd */
#define INOTIFY_WMAGIC 0x54562009 /* Svetlogorsk (Rauschen) */
#define PIPEFS_MAGIC 0x50495045 #define PIPEFS_MAGIC 0x50495045
...@@ -47,6 +49,7 @@ enum fd_types { ...@@ -47,6 +49,7 @@ enum fd_types {
FDINFO_UNIXSK, FDINFO_UNIXSK,
FDINFO_EVENTFD, FDINFO_EVENTFD,
FDINFO_EVENTPOLL, FDINFO_EVENTPOLL,
FDINFO_INOTIFY,
FD_INFO_MAX FD_INFO_MAX
}; };
...@@ -109,6 +112,21 @@ struct eventpoll_file_entry { ...@@ -109,6 +112,21 @@ struct eventpoll_file_entry {
fown_t fown; fown_t fown;
} __packed; } __packed;
struct inotify_wd_entry {
u32 id;
u64 i_ino;
u32 mask;
u32 s_dev;
u32 wd;
fh_t f_handle;
} __packed;
struct inotify_file_entry {
u32 id;
u16 flags;
fown_t fown;
} __packed;
struct fdinfo_entry { struct fdinfo_entry {
u32 fd; u32 fd;
u8 type; u8 type;
......
#ifndef INOTIFY_H__
#define INOTIFY_H__
#include <sys/types.h>
#include <unistd.h>
#include "compiler.h"
#include "types.h"
#include "files.h"
#include "crtools.h"
extern int is_inotify_link(int lfd);
extern int dump_one_inotify(int lfd, u32 id, const struct fd_parms *p);
extern int collect_inotify(void);
extern void show_inotify_wd(int fd_inotify_wd, struct cr_options *o);
extern void show_inotify(int fd_inotify, struct cr_options *o);
#endif /* INOTIFY_H__ */
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
struct proc_mountinfo; struct proc_mountinfo;
extern int open_mnt_root(unsigned int s_dev, struct proc_mountinfo *mntinfo, int nr_mntinfo); extern int open_mount(unsigned int s_dev);
extern int collect_mount_info(void);
#endif /* MOUNT_H__ */ #endif /* MOUNT_H__ */
...@@ -216,4 +216,11 @@ struct f_owner_ex { ...@@ -216,4 +216,11 @@ struct f_owner_ex {
}; };
#endif #endif
/* File handle */
typedef struct {
u32 bytes;
u32 type;
u64 __handle[16];
} fh_t;
#endif /* CR_TYPES_H_ */ #endif /* CR_TYPES_H_ */
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <stdlib.h>
#include <signal.h>
#include <string.h>
#include <utime.h>
#include <dirent.h>
#include <limits.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/inotify.h>
#include <sys/vfs.h>
#include <sys/wait.h>
#include <sys/poll.h>
#include <sys/mman.h>
#include <sys/mount.h>
#include <aio.h>
#include "compiler.h"
#include "types.h"
#include "inotify.h"
#include "syscall.h"
#include "crtools.h"
#include "mount.h"
#include "image.h"
#include "util.h"
#include "files.h"
#include "file-ids.h"
#include "log.h"
#include "list.h"
#include "lock.h"
struct inotify_wd_info {
struct list_head list;
struct inotify_wd_entry iwe;
};
struct inotify_file_info {
struct list_head list;
struct inotify_file_entry ife;
struct list_head marks;
struct file_desc d;
};
static LIST_HEAD(info_head);
static char fdinfo_buf[PAGE_SIZE];
/* Checks if file desciptor @lfd is inotify */
int is_inotify_link(int lfd)
{
char link[PATH_MAX], path[32];
ssize_t ret;
snprintf(path, sizeof(path), "/proc/self/fd/%d", lfd);
ret = readlink(path, link, sizeof(link));
if (ret < 0) {
pr_perror("Can't read link of fd %d\n", lfd);
return 0;
}
link[ret] = 0;
if (!strcmp(link, "anon_inode:inotify"))
return 1;
return 0;
}
void show_inotify_wd(int fd_inotify_wd, struct cr_options *o)
{
struct inotify_wd_entry e;
pr_img_head(CR_FD_INOTIFY_WD);
while (1) {
int ret;
ret = read_img_eof(fd_inotify_wd, &e);
if (ret <= 0)
goto out;
pr_msg("inotify-wd: id 0x%08x 0x%08x s_dev 0x%08x i_ino 0x%016lx mask 0x%08x "
"[fhandle] 0x%08x 0x%08x 0x%016lx:0x%016lx ...\n",
e.id, e.wd, e.s_dev, e.i_ino, e.mask,
e.f_handle.bytes, e.f_handle.type,
e.f_handle.__handle[0],
e.f_handle.__handle[1]);
}
out:
pr_img_tail(CR_FD_INOTIFY_WD);
}
void show_inotify(int fd_inotify, struct cr_options *o)
{
struct inotify_file_entry e;
pr_img_head(CR_FD_INOTIFY);
while (1) {
int ret;
ret = read_img_eof(fd_inotify, &e);
if (ret <= 0)
goto out;
pr_msg("inotify: id 0x%08x flags 0x%08x\n\t", e.id, e.flags);
show_fown_cont(&e.fown);
pr_msg("\n");
}
out:
pr_img_tail(CR_FD_INOTIFY);
}
static char nybble(const char n)
{
if (n >= '0' && n <= '9') return n - '0';
else if (n >= 'A' && n <= 'F') return n - ('A' - 10);
else if (n >= 'a' && n <= 'f') return n - ('a' - 10);
return 0;
}
static void parse_fhandle_encoded(char *tok, fh_t *f)
{
char *d = (char *)f->__handle;
int i = 0;
memzero(d, sizeof(f->__handle));
while (*tok == ' ')
tok++;
while (*tok) {
if (i >= sizeof(f->__handle))
break;
d[i++] = (nybble(tok[0]) << 4) | nybble(tok[1]);
if (tok[1])
tok += 2;
else
break;
}
}
int dump_one_inotify(int lfd, u32 id, const struct fd_parms *p)
{
struct inotify_file_entry ie;
struct inotify_wd_entry we;
int image_fd, image_wd;
int ret = -1, fdinfo;
char *tok, *pos;
image_fd = fdset_fd(glob_fdset, CR_FD_INOTIFY);
image_wd = fdset_fd(glob_fdset, CR_FD_INOTIFY_WD);
pr_info("Dumping inotify %d with id 0x%08x\n", lfd, id);
ie.id = id;
ie.flags= p->flags;
ie.fown = p->fown;
we.id = id;
snprintf(fdinfo_buf, sizeof(fdinfo_buf), "/proc/self/fdinfo/%d", lfd);
fdinfo = open(fdinfo_buf, O_RDONLY);
if (fdinfo < 0) {
pr_perror("Can't open %d (%d)", p->fd, lfd);
return -1;
}
ret = read(fdinfo, fdinfo_buf, sizeof(fdinfo_buf));
close(fdinfo);
if (ret <= 0) {
pr_perror("Reading inotify from %d (%d) failed", p->fd, lfd);
return -1;
}
ret = -1;
if (write_img(image_fd, &ie))
goto err;
pos = strstr(fdinfo_buf, "wd:");
if (!pos)
goto parse_error;
tok = strtok(pos, "\n");
while (tok) {
pr_debug("Line: `%s'\n", tok);
ret = sscanf(tok,
"wd: %8d ino: %16lx, sdev: %8x mask %8x "
"fhandle-bytes: %8x fhandle-type: %8x f_handle: ",
&we.wd, &we.i_ino, &we.s_dev, &we.mask,
&we.f_handle.bytes, &we.f_handle.type);
if (ret != 6) {
pr_err("Inotify fdinfo format mismatch #%d\n", ret);
goto parse_error;
}
pos = strstr(tok, "f_handle: ");
if (!pos)
goto parse_error;
tok = pos + 10;
parse_fhandle_encoded(tok, &we.f_handle);
pr_info("inotify: id 0x%08x flags 0x%08x wd 0x%08x s_dev 0x%08x i_ino 0x%16lx mask 0x%08x\n",
ie.id, ie.flags, we.wd, we.s_dev, we.i_ino, we.mask);
pr_info("\t[fhandle] bytes 0x%08x type 0x%08x __handle 0x%016lx:0x%016lx\n",
we.f_handle.bytes, we.f_handle.type,
we.f_handle.__handle[0], we.f_handle.__handle[1]);
if (write_img(image_wd, &we))
goto err;
tok = strtok(NULL, "\n");
}
ret = 0;
err:
return ret;
parse_error:
pr_err("Incorrect format in inotify fdinfo %d (%d)\n", p->fd, lfd);
goto err;
}
static int restore_one_inotify(int inotify_fd, struct inotify_wd_entry *iwe)
{
char path[32];
int mntfd, ret = -1;
int i, wd, target;
mntfd = open_mount(iwe->s_dev);
if (mntfd < 0) {
pr_err("Mount root for 0x%08x not found\n", iwe->s_dev);
return -1;
}
target = sys_open_by_handle_at(mntfd, (void *)&iwe->f_handle, 0);
if (target < 0) {
pr_perror("Can't open file handle for 0x%08x:0x%016lx",
iwe->s_dev, iwe->i_ino);
return -1;
}
snprintf(path, sizeof(path), "/proc/self/fd/%d", target);
pr_debug("\t\tRestore watch for 0x%08x:0x%016lx\n", iwe->s_dev, iwe->i_ino);
/*
* FIXME The kernel allocates wd-s sequentially,
* this is suboptimal, but the kernel doesn't
* provide and API for this yet :(
*/
wd = 1;
while (wd >= 0) {
wd = inotify_add_watch(inotify_fd, path, iwe->mask);
if (wd < 0) {
pr_err("Can't add watch for %d with %d\n", inotify_fd, iwe->wd);
break;
} else if (wd == iwe->wd) {
ret = 0;
break;
} else if (wd > iwe->wd) {
pr_err("Usorted watch found for %d with %d\n", inotify_fd, iwe->wd);
break;
}
pr_debug("\t\tWatch got %d but %d expected\n", wd, iwe->wd);
inotify_rm_watch(inotify_fd, wd);
}
close(mntfd);
close(target);
return ret;
}
static int open_inotify_fd(struct file_desc *d)
{
struct inotify_file_info *info;
struct inotify_wd_info *wd_info;
struct file_desc *p;
int tmp;
info = container_of(d, struct inotify_file_info, d);
tmp = inotify_init1(info->ife.flags);
if (tmp < 0) {
pr_perror("Can't create inotify for 0x%08x", info->ife.id);
return -1;
}
list_for_each_entry(wd_info, &info->marks, list) {
pr_info("\tRestore inotify for 0x%08x\n", wd_info->iwe.id);
if (restore_one_inotify(tmp, &wd_info->iwe)) {
close_safe(&tmp);
break;
}
}
if (restore_fown(tmp, &info->ife.fown))
close_safe(&tmp);
return tmp;
}
static struct file_desc_ops desc_ops = {
.open = open_inotify_fd,
};
static int collect_mark(struct inotify_wd_info *mark)
{
struct inotify_file_info *p;
list_for_each_entry(p, &info_head, list) {
if (p->ife.id == mark->iwe.id) {
list_add(&mark->list, &p->marks);
return 0;
}
}
return -1;
}
int collect_inotify(void)
{
struct inotify_file_info *info;
struct inotify_wd_info *mark;
int image_fd = -1, image_wd = -1, ret = -1;
image_fd = open_image_ro(CR_FD_INOTIFY);
if (image_fd < 0)
return -1;
while (1) {
struct inotify_file_entry ife;
ret = read_img_eof(image_fd, &ife);
if (ret < 0)
goto err;
else if (!ret)
break;
info = xmalloc(sizeof(*info));
if (!info)
return -1;
info->ife = ife;
INIT_LIST_HEAD(&info->list);
INIT_LIST_HEAD(&info->marks);
list_add(&info->list, &info_head);
}
image_wd = open_image_ro(CR_FD_INOTIFY_WD);
if (image_wd < 0)
goto err;
while (1) {
int idx;
mark = xmalloc(sizeof(*mark));
if (!mark)
goto err;
ret = read_img_eof(image_wd, &mark->iwe);
if (ret < 0)
goto err;
else if (!ret)
break;
if (collect_mark(mark)) {
ret = -1;
pr_err("Can't find inotify with id 0x%08x\n", mark->iwe.id);
goto err;
}
}
list_for_each_entry(info, &info_head, list) {
pr_info("Collected inotify: id 0x%08x flags 0x%08x\n", info->ife.id, info->ife.flags);
file_desc_add(&info->d, FDINFO_INOTIFY, info->ife.id, &desc_ops);
}
ret = 0;
err:
close_safe(&image_wd);
close_safe(&image_fd);
return ret;
}
...@@ -10,17 +10,14 @@ ...@@ -10,17 +10,14 @@
#include "types.h" #include "types.h"
#include "util.h" #include "util.h"
#include "log.h"
#include "mount.h" #include "mount.h"
#include "proc_parse.h" #include "proc_parse.h"
/* static struct proc_mountinfo *mntinfo;
* Returns path for mount device @s_dev static int nr_mntinfo;
*
* FIXME this is not sufficient in general int open_mount(unsigned int s_dev)
* since mount points can be overmounted but
* works for now.
*/
int open_mnt_root(unsigned int s_dev, struct proc_mountinfo *mntinfo, int nr_mntinfo)
{ {
static int last = 0; static int last = 0;
int i; int i;
...@@ -40,3 +37,19 @@ again: ...@@ -40,3 +37,19 @@ again:
return -ENOENT; return -ENOENT;
} }
int collect_mount_info(void)
{
nr_mntinfo = 64;
mntinfo = xmalloc(sizeof(*mntinfo) * nr_mntinfo);
if (!mntinfo)
return -1;
nr_mntinfo = parse_mountinfo(getpid(), mntinfo, nr_mntinfo);
if (nr_mntinfo < 1) {
pr_err("Parsing mountinfo %d failed\n", getpid());
return -1;
}
return 0;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment