Commit d6a1cd0f authored by Andrey Vagin's avatar Andrey Vagin Committed by Cyrill Gorcunov

restore: Learn to work with shared struct file-s

Some process can share one struct file-s, we may find them by "object IDs".
A file descriptor is opened in one process and send to other via unix socket.

The procedure of restoring files contains four stages.
* Collect data about all file's descriptors
  On this stage we find process which will restore a file descriptor and
  create a list of processes, who should get this descriptor.

* Create datagrams unix sockets
  If a file descriptor should be received, a unix socket is created
  instead of it.

* Open file descriptors
  A process with the least pid opens a file and sends this file
  descriptors to all one who wait it.

* Receive file descriptors.

When we were thinking up this algoritm, we wanted to minimize a number
of context switches. A number of context switches is proportional of a
number of processes.
Signed-off-by: 's avatarAndrey Vagin <avagin@openvz.org>
Acked-by: 's avatarPavel Emelyanov <xemul@parallels.com>
Signed-off-by: 's avatarCyrill Gorcunov <gorcunov@openvz.org>
parent b5cc5fc3
......@@ -94,7 +94,7 @@ err_bogus_mapping:
static int dump_one_reg_file(int type, unsigned long fd_name, int lfd,
bool do_close, unsigned long pos, unsigned int flags,
struct cr_fdset *cr_fdset)
char *id, struct cr_fdset *cr_fdset)
{
struct fdinfo_entry e;
char fd_str[128];
......@@ -120,6 +120,8 @@ static int dump_one_reg_file(int type, unsigned long fd_name, int lfd,
e.flags = flags;
e.pos = pos;
e.addr = fd_name;
if (id)
memcpy(e.id, id, FD_ID_SIZE);
pr_info("fdinfo: type: %2x len: %2x flags: %4x pos: %8x addr: %16lx\n",
type, len, flags, pos, fd_name);
......@@ -143,7 +145,7 @@ static int dump_cwd(char *path, struct cr_fdset *cr_fdset)
return -1;
}
return dump_one_reg_file(FDINFO_FD, ~0L, fd, 1, 0, 0, cr_fdset);
return dump_one_reg_file(FDINFO_FD, ~0L, fd, 1, 0, 0, NULL, cr_fdset);
}
......@@ -231,7 +233,7 @@ err:
}
static int dump_one_fd(char *pid_fd_dir, int dir, char *fd_name, unsigned long pos,
unsigned int flags, struct cr_fdset *cr_fdset)
unsigned int flags, char *id, struct cr_fdset *cr_fdset)
{
struct statfs stfs_buf;
struct stat st_buf;
......@@ -269,7 +271,7 @@ static int dump_one_fd(char *pid_fd_dir, int dir, char *fd_name, unsigned long p
S_ISDIR(st_buf.st_mode) ||
(S_ISCHR(st_buf.st_mode) && major(st_buf.st_rdev) == MEM_MAJOR))
return dump_one_reg_file(FDINFO_FD, atol(fd_name),
fd, 1, pos, flags, cr_fdset);
fd, 1, pos, flags, id, cr_fdset);
if (S_ISFIFO(st_buf.st_mode)) {
if (fstatfs(fd, &stfs_buf) < 0) {
......@@ -290,9 +292,11 @@ out_close:
return err;
}
static int read_fd_params(pid_t pid, char *fd, unsigned long *pos, unsigned int *flags)
static int read_fd_params(pid_t pid, char *fd, unsigned long *pos,
unsigned int *flags, char *id)
{
FILE *file;
unsigned int f;
file = fopen_proc("%d/fdinfo/%s", "r", pid, fd);
if (!file) {
......@@ -300,10 +304,11 @@ static int read_fd_params(pid_t pid, char *fd, unsigned long *pos, unsigned int
return -1;
}
fscanf(file, "pos:\t%li\nflags:\t%o\n", pos, flags);
fscanf(file, "pos:\t%li\nflags:\t%o\nid:\t%s\n", pos, flags, id);
fclose(file);
pr_info("%d fdinfo %s: pos: %16lx flags: %16lx\n", pid, fd, *pos, *flags);
pr_info("%d fdinfo %s: pos: %16lx flags: %16o id %s\n",
pid, fd, *pos, *flags, id);
return 0;
}
......@@ -314,6 +319,7 @@ static int dump_task_files(pid_t pid, struct cr_fdset *cr_fdset)
struct dirent *de;
unsigned long pos;
unsigned int flags;
char id[FD_ID_SIZE];
DIR *fd_dir;
pr_info("\n");
......@@ -336,9 +342,10 @@ static int dump_task_files(pid_t pid, struct cr_fdset *cr_fdset)
while ((de = readdir(fd_dir))) {
if (de->d_name[0] == '.')
continue;
if (read_fd_params(pid, de->d_name, &pos, &flags))
if (read_fd_params(pid, de->d_name, &pos, &flags, id))
return -1;
if (dump_one_fd(pid_fd_dir, dirfd(fd_dir), de->d_name, pos, flags, cr_fdset))
if (dump_one_fd(pid_fd_dir, dirfd(fd_dir), de->d_name,
pos, flags, id, cr_fdset))
return -1;
}
......@@ -393,7 +400,7 @@ static int dump_task_mappings(pid_t pid, struct list_head *vma_area_list, struct
ret = dump_one_reg_file(FDINFO_MAP,
vma->start,
vma_area->vm_file_fd,
0, 0, flags,
0, 0, flags, NULL,
cr_fdset);
if (ret)
goto err;
......
......@@ -382,6 +382,9 @@ static int prepare_shared(int ps_fd)
return -1;
}
if (prepare_fdinfo_global())
return -1;
while (1) {
struct pstree_entry e;
int ret;
......@@ -401,6 +404,9 @@ static int prepare_shared(int ps_fd)
if (prepare_pipes_pid(e.pid))
return -1;
if (prepare_fd_pid(e.pid))
return -1;
lseek(ps_fd, e.nr_children * sizeof(u32) + e.nr_threads * sizeof(u32), SEEK_CUR);
}
......
This diff is collapsed.
......@@ -2,6 +2,8 @@
#define FILES_H_
extern int prepare_fds(int pid);
extern int prepare_fd_pid(int pid);
extern int prepare_fdinfo_global(void);
extern int try_fixup_file_map(int pid, struct vma_entry *vma_entry, int fd);
#endif /* FILES_H_ */
......@@ -21,12 +21,15 @@
#define PAGE_RSS 1
#define PAGE_ANON 2
#define FD_ID_SIZE 50
struct fdinfo_entry {
u8 type;
u8 len;
u16 flags;
u32 pos;
u64 addr;
char id[FD_ID_SIZE];
u8 name[0];
} __packed;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment