Commit d6a1cd0f authored by Andrey Vagin's avatar Andrey Vagin Committed by Cyrill Gorcunov

restore: Learn to work with shared struct file-s

Some process can share one struct file-s, we may find them by "object IDs".
A file descriptor is opened in one process and send to other via unix socket.

The procedure of restoring files contains four stages.
* Collect data about all file's descriptors
  On this stage we find process which will restore a file descriptor and
  create a list of processes, who should get this descriptor.

* Create datagrams unix sockets
  If a file descriptor should be received, a unix socket is created
  instead of it.

* Open file descriptors
  A process with the least pid opens a file and sends this file
  descriptors to all one who wait it.

* Receive file descriptors.

When we were thinking up this algoritm, we wanted to minimize a number
of context switches. A number of context switches is proportional of a
number of processes.
Signed-off-by: 's avatarAndrey Vagin <avagin@openvz.org>
Acked-by: 's avatarPavel Emelyanov <xemul@parallels.com>
Signed-off-by: 's avatarCyrill Gorcunov <gorcunov@openvz.org>
parent b5cc5fc3
...@@ -94,7 +94,7 @@ err_bogus_mapping: ...@@ -94,7 +94,7 @@ err_bogus_mapping:
static int dump_one_reg_file(int type, unsigned long fd_name, int lfd, static int dump_one_reg_file(int type, unsigned long fd_name, int lfd,
bool do_close, unsigned long pos, unsigned int flags, bool do_close, unsigned long pos, unsigned int flags,
struct cr_fdset *cr_fdset) char *id, struct cr_fdset *cr_fdset)
{ {
struct fdinfo_entry e; struct fdinfo_entry e;
char fd_str[128]; char fd_str[128];
...@@ -120,6 +120,8 @@ static int dump_one_reg_file(int type, unsigned long fd_name, int lfd, ...@@ -120,6 +120,8 @@ static int dump_one_reg_file(int type, unsigned long fd_name, int lfd,
e.flags = flags; e.flags = flags;
e.pos = pos; e.pos = pos;
e.addr = fd_name; e.addr = fd_name;
if (id)
memcpy(e.id, id, FD_ID_SIZE);
pr_info("fdinfo: type: %2x len: %2x flags: %4x pos: %8x addr: %16lx\n", pr_info("fdinfo: type: %2x len: %2x flags: %4x pos: %8x addr: %16lx\n",
type, len, flags, pos, fd_name); type, len, flags, pos, fd_name);
...@@ -143,7 +145,7 @@ static int dump_cwd(char *path, struct cr_fdset *cr_fdset) ...@@ -143,7 +145,7 @@ static int dump_cwd(char *path, struct cr_fdset *cr_fdset)
return -1; return -1;
} }
return dump_one_reg_file(FDINFO_FD, ~0L, fd, 1, 0, 0, cr_fdset); return dump_one_reg_file(FDINFO_FD, ~0L, fd, 1, 0, 0, NULL, cr_fdset);
} }
...@@ -231,7 +233,7 @@ err: ...@@ -231,7 +233,7 @@ err:
} }
static int dump_one_fd(char *pid_fd_dir, int dir, char *fd_name, unsigned long pos, static int dump_one_fd(char *pid_fd_dir, int dir, char *fd_name, unsigned long pos,
unsigned int flags, struct cr_fdset *cr_fdset) unsigned int flags, char *id, struct cr_fdset *cr_fdset)
{ {
struct statfs stfs_buf; struct statfs stfs_buf;
struct stat st_buf; struct stat st_buf;
...@@ -269,7 +271,7 @@ static int dump_one_fd(char *pid_fd_dir, int dir, char *fd_name, unsigned long p ...@@ -269,7 +271,7 @@ static int dump_one_fd(char *pid_fd_dir, int dir, char *fd_name, unsigned long p
S_ISDIR(st_buf.st_mode) || S_ISDIR(st_buf.st_mode) ||
(S_ISCHR(st_buf.st_mode) && major(st_buf.st_rdev) == MEM_MAJOR)) (S_ISCHR(st_buf.st_mode) && major(st_buf.st_rdev) == MEM_MAJOR))
return dump_one_reg_file(FDINFO_FD, atol(fd_name), return dump_one_reg_file(FDINFO_FD, atol(fd_name),
fd, 1, pos, flags, cr_fdset); fd, 1, pos, flags, id, cr_fdset);
if (S_ISFIFO(st_buf.st_mode)) { if (S_ISFIFO(st_buf.st_mode)) {
if (fstatfs(fd, &stfs_buf) < 0) { if (fstatfs(fd, &stfs_buf) < 0) {
...@@ -290,9 +292,11 @@ out_close: ...@@ -290,9 +292,11 @@ out_close:
return err; return err;
} }
static int read_fd_params(pid_t pid, char *fd, unsigned long *pos, unsigned int *flags) static int read_fd_params(pid_t pid, char *fd, unsigned long *pos,
unsigned int *flags, char *id)
{ {
FILE *file; FILE *file;
unsigned int f;
file = fopen_proc("%d/fdinfo/%s", "r", pid, fd); file = fopen_proc("%d/fdinfo/%s", "r", pid, fd);
if (!file) { if (!file) {
...@@ -300,10 +304,11 @@ static int read_fd_params(pid_t pid, char *fd, unsigned long *pos, unsigned int ...@@ -300,10 +304,11 @@ static int read_fd_params(pid_t pid, char *fd, unsigned long *pos, unsigned int
return -1; return -1;
} }
fscanf(file, "pos:\t%li\nflags:\t%o\n", pos, flags); fscanf(file, "pos:\t%li\nflags:\t%o\nid:\t%s\n", pos, flags, id);
fclose(file); fclose(file);
pr_info("%d fdinfo %s: pos: %16lx flags: %16lx\n", pid, fd, *pos, *flags); pr_info("%d fdinfo %s: pos: %16lx flags: %16o id %s\n",
pid, fd, *pos, *flags, id);
return 0; return 0;
} }
...@@ -314,6 +319,7 @@ static int dump_task_files(pid_t pid, struct cr_fdset *cr_fdset) ...@@ -314,6 +319,7 @@ static int dump_task_files(pid_t pid, struct cr_fdset *cr_fdset)
struct dirent *de; struct dirent *de;
unsigned long pos; unsigned long pos;
unsigned int flags; unsigned int flags;
char id[FD_ID_SIZE];
DIR *fd_dir; DIR *fd_dir;
pr_info("\n"); pr_info("\n");
...@@ -336,9 +342,10 @@ static int dump_task_files(pid_t pid, struct cr_fdset *cr_fdset) ...@@ -336,9 +342,10 @@ static int dump_task_files(pid_t pid, struct cr_fdset *cr_fdset)
while ((de = readdir(fd_dir))) { while ((de = readdir(fd_dir))) {
if (de->d_name[0] == '.') if (de->d_name[0] == '.')
continue; continue;
if (read_fd_params(pid, de->d_name, &pos, &flags)) if (read_fd_params(pid, de->d_name, &pos, &flags, id))
return -1; return -1;
if (dump_one_fd(pid_fd_dir, dirfd(fd_dir), de->d_name, pos, flags, cr_fdset)) if (dump_one_fd(pid_fd_dir, dirfd(fd_dir), de->d_name,
pos, flags, id, cr_fdset))
return -1; return -1;
} }
...@@ -393,7 +400,7 @@ static int dump_task_mappings(pid_t pid, struct list_head *vma_area_list, struct ...@@ -393,7 +400,7 @@ static int dump_task_mappings(pid_t pid, struct list_head *vma_area_list, struct
ret = dump_one_reg_file(FDINFO_MAP, ret = dump_one_reg_file(FDINFO_MAP,
vma->start, vma->start,
vma_area->vm_file_fd, vma_area->vm_file_fd,
0, 0, flags, 0, 0, flags, NULL,
cr_fdset); cr_fdset);
if (ret) if (ret)
goto err; goto err;
......
...@@ -382,6 +382,9 @@ static int prepare_shared(int ps_fd) ...@@ -382,6 +382,9 @@ static int prepare_shared(int ps_fd)
return -1; return -1;
} }
if (prepare_fdinfo_global())
return -1;
while (1) { while (1) {
struct pstree_entry e; struct pstree_entry e;
int ret; int ret;
...@@ -401,6 +404,9 @@ static int prepare_shared(int ps_fd) ...@@ -401,6 +404,9 @@ static int prepare_shared(int ps_fd)
if (prepare_pipes_pid(e.pid)) if (prepare_pipes_pid(e.pid))
return -1; return -1;
if (prepare_fd_pid(e.pid))
return -1;
lseek(ps_fd, e.nr_children * sizeof(u32) + e.nr_threads * sizeof(u32), SEEK_CUR); lseek(ps_fd, e.nr_children * sizeof(u32) + e.nr_threads * sizeof(u32), SEEK_CUR);
} }
......
This diff is collapsed.
...@@ -2,6 +2,8 @@ ...@@ -2,6 +2,8 @@
#define FILES_H_ #define FILES_H_
extern int prepare_fds(int pid); extern int prepare_fds(int pid);
extern int prepare_fd_pid(int pid);
extern int prepare_fdinfo_global(void);
extern int try_fixup_file_map(int pid, struct vma_entry *vma_entry, int fd); extern int try_fixup_file_map(int pid, struct vma_entry *vma_entry, int fd);
#endif /* FILES_H_ */ #endif /* FILES_H_ */
...@@ -21,12 +21,15 @@ ...@@ -21,12 +21,15 @@
#define PAGE_RSS 1 #define PAGE_RSS 1
#define PAGE_ANON 2 #define PAGE_ANON 2
#define FD_ID_SIZE 50
struct fdinfo_entry { struct fdinfo_entry {
u8 type; u8 type;
u8 len; u8 len;
u16 flags; u16 flags;
u32 pos; u32 pos;
u64 addr; u64 addr;
char id[FD_ID_SIZE];
u8 name[0]; u8 name[0];
} __packed; } __packed;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment