Commit 227f1771 authored by Pavel Emelyanov's avatar Pavel Emelyanov Committed by Cyrill Gorcunov

cr: Split dumped pages locations

This actually does two things:

1. The parasite code writes to pages _or_ to pages_shared file himself based
   on a hint given from the main program. This avoids shared pages copying
   in finalize_core.

2. The private pages are moved out of the core file into a separate one. This
   avoids private pages copying in finalize_core.

The goal of this patch is a) to avoid pages copying at all (we still have
one on restore, but fixing this requires Andrey's work on shared memory
dumping) and b) make big blobs with pages be stored in separate files (I
have plans on its format rework and unification).
Signed-off-by: 's avatarPavel Emelyanov <xemul@parallels.com>
Signed-off-by: 's avatarCyrill Gorcunov <gorcunov@openvz.org>
parent 76a0186c
......@@ -1031,25 +1031,19 @@ static struct vma_area *find_vma_by_addr(struct list_head *vma_area_list, unsign
/* kernel expects a special format in core file */
static int finalize_core(pid_t pid, struct list_head *vma_area_list, struct cr_fdset *cr_fdset)
{
int fd_pages, fd_pages_shmem, fd_core;
unsigned long num, num_anon;
int fd_core;
unsigned long num;
struct vma_area *vma_area;
struct vma_entry ve;
ssize_t bytes;
int ret = -1;
u64 va;
pr_info("\n");
pr_info("Finalizing core (pid: %d)\n", pid);
pr_info("----------------------------------------\n");
fd_core = cr_fdset->fds[CR_FD_CORE];
fd_pages = cr_fdset->fds[CR_FD_PAGES];
fd_pages_shmem = cr_fdset->fds[CR_FD_PAGES_SHMEM];
lseek(fd_core, GET_FILE_OFF_AFTER(struct core_entry), SEEK_SET);
lseek(fd_pages, MAGIC_OFFSET, SEEK_SET);
lseek(fd_pages_shmem, MAGIC_OFFSET, SEEK_SET);
num = 0;
pr_info("Appending VMAs ... ");
......@@ -1072,81 +1066,12 @@ static int finalize_core(pid_t pid, struct list_head *vma_area_list, struct cr_f
pr_info("OK (%li written)\n", num);
num = 0;
num_anon = 0;
pr_info("Appending pages ... ");
while (1) {
bytes = read(fd_pages, &va, sizeof(va));
if (!bytes)
break;
if (bytes != sizeof(va)) {
pr_perror("\nUnable to read VA of page (%li written)", num);
goto err;
}
/* Ending marker */
if (va == 0) {
if (write_img(fd_core, &zero_page_entry))
goto err;
if (write_img(fd_pages_shmem, &zero_page_entry))
goto err;
break;
}
vma_area = find_vma_by_addr(vma_area_list, (unsigned long)va);
if (!vma_area) {
pr_panic("\nA page with address %lx is unknown\n", va);
goto err;
}
/*
* Just in case if someone broke parasite page
* dumper code.
*/
if (!vma_area_is(vma_area, VMA_AREA_REGULAR)) {
pr_panic("\nA page with address %lx has a wrong status\n", va);
goto err;
}
if (vma_area_is(vma_area, VMA_ANON_PRIVATE) ||
vma_area_is(vma_area, VMA_FILE_PRIVATE)) {
bytes = write(fd_core, &va, sizeof(va));
bytes += sendfile(fd_core, fd_pages, NULL, PAGE_SIZE);
if (bytes != sizeof(va) + PAGE_SIZE) {
pr_perror("\nUnable to write VMA_FILE_PRIVATE|VMA_ANON_PRIVATE "
"page (%li, %li written)",
num, num_anon);
goto err;
}
num++;
} else if (vma_area_is(vma_area, VMA_ANON_SHARED)) {
bytes = write(fd_pages_shmem, &va, sizeof(va));
bytes += sendfile(fd_pages_shmem, fd_pages, NULL, PAGE_SIZE);
if (bytes != sizeof(va) + PAGE_SIZE) {
pr_perror("\nUnable to write VMA_ANON_SHARED "
"page (%li, %li written)",
num, num_anon);
goto err;
}
num_anon++;
} else {
pr_warning("Unexpected VMA area found\n");
pr_info_vma(vma_area);
lseek(fd_pages, PAGE_SIZE, SEEK_CUR);
}
}
ret = 0;
pr_info("OK (%li written)\n", num + num_anon);
err:
pr_info("----------------------------------------\n");
return ret;
return 0;
err_strno:
err:
pr_perror("Error catched");
goto err;
return -1;
}
static int dump_task_thread(pid_t pid, struct cr_fdset *cr_fdset)
......
......@@ -624,61 +624,59 @@ static inline bool should_restore_page(int pid, unsigned long va)
return si->pid == pid;
}
/*
* FIXME avoid this pages copying
*/
static int fixup_pages_data(int pid, int fd)
{
int shfd;
int pgfd, ret;
u64 va;
pr_info("%d: Reading shmem pages img\n", pid);
shfd = open_image_ro(CR_FD_PAGES_SHMEM, pid);
if (shfd < 0)
pgfd = open_image_ro(CR_FD_PAGES, pid);
if (pgfd < 0)
return -1;
/*
* Find out the last page, which must be a zero page.
*/
lseek(fd, -sizeof(struct page_entry), SEEK_END);
read(fd, &va, sizeof(va));
if (va) {
pr_panic("Zero-page expected but got %lx\n", (unsigned long)va);
while (1) {
ret = read_img(pgfd, &va);
if (ret < 0)
return -1;
}
/*
* Since we're to update pages we suppress old zero-page
* and will write new one at the end.
*/
lseek(fd, -sizeof(struct page_entry), SEEK_END);
if (va == 0)
break;
while (1) {
int ret;
write(fd, &va, sizeof(va));
sendfile(fd, pgfd, NULL, PAGE_SIZE);
}
ret = read(shfd, &va, sizeof(va));
if (ret == 0)
break;
close(pgfd);
if (ret < 0 || ret != sizeof(va)) {
pr_perror("%d: Can't read virtual address", pid);
pgfd = open_image_ro(CR_FD_PAGES_SHMEM, pid);
if (pgfd < 0)
return -1;
while (1) {
ret = read_img(pgfd, &va);
if (ret < 0)
return -1;
}
if (va == 0)
break;
if (!should_restore_page(pid, va)) {
lseek(shfd, PAGE_SIZE, SEEK_CUR);
lseek(pgfd, PAGE_SIZE, SEEK_CUR);
continue;
}
pr_info("%d: Restoring shared page: %16lx\n",
pid, va);
pr_info("%d: Restoring shared page: %16lx\n", pid, va);
write(fd, &va, sizeof(va));
sendfile(fd, shfd, NULL, PAGE_SIZE);
sendfile(fd, pgfd, NULL, PAGE_SIZE);
}
close(shfd);
close(pgfd);
write_img(fd, &zero_page_entry);
return 0;
......
......@@ -455,9 +455,6 @@ static void show_core(int fd_core, bool show_content)
goto out;
show_vma(fd_core);
pr_info("\n\t---[Memory pages]---\n");
show_pages(fd_core, show_content);
out:
pr_img_tail(CR_FD_CORE);
}
......
......@@ -35,7 +35,10 @@ enum {
PARASITE_CMD_SET_LOGFD,
PARASITE_CMD_FINI,
PARASITE_CMD_DUMPPAGES_INIT,
PARASITE_CMD_DUMPPAGES,
PARASITE_CMD_DUMPPAGES_FINI,
PARASITE_CMD_DUMP_SIGACTS,
PARASITE_CMD_DUMP_ITIMERS,
PARASITE_CMD_DUMP_MISC,
......@@ -72,9 +75,12 @@ struct parasite_dump_pages_args {
parasite_status_t status;
struct vma_entry vma_entry;
unsigned long nrpages_dumped; /* how many pages are dumped */
unsigned long fd;
int fd_type;
};
#define PG_PRIV 0
#define PG_SHARED 1
/*
* Misc sfuff, that is too small for separate file, but cannot
* be read w/o using parasite
......
......@@ -484,6 +484,7 @@ int parasite_dump_pages_seized(struct parasite_ctl *ctl, struct list_head *vma_a
struct cr_fdset *cr_fdset)
{
struct parasite_dump_pages_args parasite_dumppages = { };
parasite_status_t *st = &parasite_dumppages.status;
user_regs_struct_t regs, regs_orig;
unsigned long nrpages_dumped = 0;
struct vma_area *vma_area;
......@@ -498,12 +499,18 @@ int parasite_dump_pages_seized(struct parasite_ctl *ctl, struct list_head *vma_a
if (ret < 0)
goto out;
/*
* Make sure the data is on disk since we will re-open
* it in another process.
*/
fsync(cr_fdset->fds[CR_FD_PAGES]);
parasite_dumppages.fd = -1UL;
ret = parasite_prep_file(CR_FD_PAGES_SHMEM, ctl, cr_fdset);
if (ret < 0)
goto out;
ret = parasite_execute(PARASITE_CMD_DUMPPAGES_INIT, ctl, st, sizeof(*st));
if (ret < 0) {
pr_panic("Dumping pages failed with %li (%li) at %li\n",
parasite_dumppages.status.ret,
parasite_dumppages.status.sys_ret,
parasite_dumppages.status.line);
goto out;
}
list_for_each_entry(vma_area, vma_area_list, list) {
......@@ -520,6 +527,16 @@ int parasite_dump_pages_seized(struct parasite_ctl *ctl, struct list_head *vma_a
pr_info_vma(vma_area);
parasite_dumppages.vma_entry = vma_area->vma;
if (vma_area_is(vma_area, VMA_ANON_PRIVATE) ||
vma_area_is(vma_area, VMA_FILE_PRIVATE))
parasite_dumppages.fd_type = PG_PRIV;
else if (vma_area_is(vma_area, VMA_ANON_SHARED))
parasite_dumppages.fd_type = PG_SHARED;
else {
pr_warning("Unexpected VMA area found\n");
continue;
}
ret = parasite_execute(PARASITE_CMD_DUMPPAGES, ctl,
(parasite_status_t *) &parasite_dumppages,
sizeof(parasite_dumppages));
......@@ -529,48 +546,27 @@ int parasite_dump_pages_seized(struct parasite_ctl *ctl, struct list_head *vma_a
parasite_dumppages.status.sys_ret,
parasite_dumppages.status.line);
goto err_restore;
goto out;
}
pr_info(" (dumped: %16li pages)\n", parasite_dumppages.nrpages_dumped);
nrpages_dumped += parasite_dumppages.nrpages_dumped;
}
if (ptrace(PTRACE_GETREGS, (long)ctl->pid, NULL, &regs_orig)) {
pr_err("Can't get registers (pid: %d)\n", ctl->pid);
goto err_restore;
}
/* Finally close the descriptor the parasite has opened */
if (parasite_dumppages.fd != -1UL) {
regs = regs_orig;
regs.ax = __NR_close; /* close */
regs.di = parasite_dumppages.fd; /* @fd */
ret = syscall_seized(ctl->pid, &regs_orig, &regs, &regs);
}
parasite_execute(PARASITE_CMD_DUMPPAGES_FINI, ctl, st, sizeof(*st));
if (ptrace(PTRACE_SETREGS, (long)ctl->pid, NULL, &regs_orig)) {
pr_panic("Can't restore registers (pid: %d)\n", ctl->pid);
goto err_restore;
}
/*
* We don't know the position in file since it's updated
* outside of our process.
*/
lseek(cr_fdset->fds[CR_FD_PAGES], 0, SEEK_END);
/* Ending page */
if (write_img(cr_fdset->fds[CR_FD_PAGES], &zero_page_entry))
goto err_restore;
goto out;
if (write_img(cr_fdset->fds[CR_FD_PAGES_SHMEM], &zero_page_entry))
goto out;
pr_info("\n");
pr_info("Summary: %16li pages dumped\n", nrpages_dumped);
ret = 0;
err_restore:
fchmod(cr_fdset->fds[CR_FD_PAGES], CR_FD_PERM);
out:
fchmod(cr_fdset->fds[CR_FD_PAGES], CR_FD_PERM);
fchmod(cr_fdset->fds[CR_FD_PAGES_SHMEM], CR_FD_PERM);
pr_info("----------------------------------------\n");
return ret;
......
......@@ -94,6 +94,27 @@ static inline int should_dump_page(struct vma_entry *vmae, unsigned char mincore
#endif
}
static int fd_pages[2] = { -1, -1 };
static int dump_pages_init(parasite_status_t *st)
{
fd_pages[PG_PRIV] = recv_fd(tsock);
if (fd_pages[PG_PRIV] < 0)
goto err;
fd_pages[PG_SHARED] = recv_fd(tsock);
if (fd_pages[PG_SHARED] < 0)
goto err_s;
return 0;
err_s:
sys_close(fd_pages[PG_PRIV]);
err:
SET_PARASITE_STATUS(st, PARASITE_ERR_FAIL, -1);
return -1;
}
/*
* This is the main page dumping routine, it's executed
* inside a victim process space.
......@@ -105,22 +126,15 @@ static int dump_pages(struct parasite_dump_pages_args *args)
unsigned long prot_old, prot_new;
unsigned char *map_brk = NULL;
unsigned char *map;
int ret = PARASITE_ERR_FAIL;
int ret = PARASITE_ERR_FAIL, fd;
args->nrpages_dumped = 0;
prot_old = prot_new = 0;
if (args->fd == -1UL) {
ret = recv_fd(tsock);
if (ret < 0)
goto err;
args->fd = ret;
}
fd = fd_pages[args->fd_type];
/* Start from the end of file */
sys_lseek(args->fd, 0, SEEK_END);
sys_lseek(fd, 0, SEEK_END);
length = args->vma_entry.end - args->vma_entry.start;
nrpages = length / PAGE_SIZE;
......@@ -188,8 +202,8 @@ static int dump_pages(struct parasite_dump_pages_args *args)
vaddr = (unsigned long)args->vma_entry.start + pfn * PAGE_SIZE;
written = 0;
written += sys_write(args->fd, &vaddr, sizeof(vaddr));
written += sys_write(args->fd, (void *)vaddr, PAGE_SIZE);
written += sys_write(fd, &vaddr, sizeof(vaddr));
written += sys_write(fd, (void *)vaddr, PAGE_SIZE);
if (written != sizeof(vaddr) + PAGE_SIZE) {
SET_PARASITE_STATUS(st, PARASITE_ERR_WRITE, written);
ret = st->ret;
......@@ -227,6 +241,13 @@ err:
return ret;
}
static int dump_pages_fini(parasite_status_t *st)
{
sys_close(fd_pages[PG_PRIV]);
sys_close(fd_pages[PG_SHARED]);
return 0;
}
static int dump_sigact(parasite_status_t *st)
{
rt_sigaction_t act;
......@@ -392,6 +413,10 @@ static int __used parasite_service(unsigned long cmd, void *args, void *brk)
return fini();
case PARASITE_CMD_SET_LOGFD:
return set_logfd();
case PARASITE_CMD_DUMPPAGES_INIT:
return dump_pages_init((parasite_status_t *) args);
case PARASITE_CMD_DUMPPAGES_FINI:
return dump_pages_fini((parasite_status_t *) args);
case PARASITE_CMD_DUMPPAGES:
return dump_pages((struct parasite_dump_pages_args *)args);
case PARASITE_CMD_DUMP_SIGACTS:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment