Commit e869c16d authored by Andrey Vagin's avatar Andrey Vagin Committed by Pavel Emelyanov

mm: rework of dumping shared memory

vma_entry contains shmid and all shared memory are dumped in own files.
The most interesting thing is restore.
A maping is restored by process with the smallest pid. The mamping
is created before executing restorer.
We map a full mapping and restore it's conten, then we open a file from
/proc/pid/map_files and store a descriptor in vma_info. The mapping is
unmaped. Now we can map any region of this mapping in the restorer.

We use this trick, because a target process may have this mapping in
some places and the restorer has not function to open proc files.

v2: fix error hangling
xemul: Fixed static-s and args for cr_dump_shmem
Signed-off-by: 's avatarAndrey Vagin <avagin@openvz.org>
Signed-off-by: 's avatarPavel Emelyanov <xemul@parallels.com>
parent 31feef8a
......@@ -415,6 +415,31 @@ static int dump_task_files(pid_t pid, const struct cr_fdset *cr_fdset,
return 0;
}
struct shmem_info
{
unsigned long size;
unsigned long shmid;
unsigned long start;
unsigned long end;
int pid;
};
static int nr_shmems;
static struct shmem_info *shmems;
#define SHMEMS_SIZE 4096
static struct shmem_info* shmem_find(unsigned long shmid)
{
int i;
for (i = 0; i < nr_shmems; i++)
if (shmems[i].shmid == shmid)
return &shmems[i];
return NULL;
}
static int dump_task_mappings(pid_t pid, const struct list_head *vma_area_list,
const struct cr_fdset *cr_fdset)
{
......@@ -431,21 +456,35 @@ static int dump_task_mappings(pid_t pid, const struct list_head *vma_area_list,
if (!vma_entry_is(vma, VMA_AREA_REGULAR))
continue;
if (vma_entry_is(vma, VMA_AREA_SYSVIPC))
continue;
pr_info_vma(vma_area);
if (vma_entry_is(vma, VMA_ANON_SHARED)) {
struct shmem_entry e;
struct shmem_info *si;
unsigned long size = vma->pgoff + (vma->end - vma->start);
si = shmem_find(vma_area->vma.shmid);
if (si) {
if (si->size < size)
si->size = size;
continue;
}
e.start = vma->start;
e.end = vma->end;
e.shmid = vma_area->vma.shmid;
nr_shmems++;
if (nr_shmems * sizeof(*si) == SHMEMS_SIZE) {
pr_err("OOM storing shmems\n");
return -1;
}
pr_info("shmem: s: %16lx e: %16lx shmid: %16lx\n",
e.start, e.end, e.shmid);
si = &shmems[nr_shmems - 1];
si->size = size;
si->pid = pid;
si->start = vma->start;
si->end = vma->end;
si->shmid = vma_area->vma.shmid;
if (write_img(cr_fdset->fds[CR_FD_SHMEM], &e))
goto err;
} else if (vma_entry_is(vma, VMA_FILE_PRIVATE) ||
vma_entry_is(vma, VMA_FILE_SHARED)) {
struct fd_parms p = {
......@@ -1230,6 +1269,8 @@ static int dump_one_zombie(const struct pstree_item *item,
struct cr_fdset *cr_fdset)
{
struct core_entry *core;
int ret;
LIST_HEAD(vma_area_list);
cr_fdset = cr_dump_fdset_open(item->pid, CR_FD_DESC_CORE, cr_fdset);
if (cr_fdset == NULL)
......@@ -1242,7 +1283,10 @@ static int dump_one_zombie(const struct pstree_item *item,
core->tc.task_state = TASK_DEAD;
core->tc.exit_code = pps->exit_code;
return dump_task_core(core, cr_fdset);
if (dump_task_core(core, cr_fdset) < 0)
return -1;
return finalize_core(item->pid, &vma_area_list, cr_fdset);
}
static struct proc_pid_stat pps_buf;
......@@ -1402,6 +1446,78 @@ err_free:
return ret;
}
static int cr_dump_shmem(void)
{
int err, fd;
struct cr_fdset *cr_fdset = NULL;
unsigned char *map = NULL;
void *addr = NULL;
struct shmem_info *si;
unsigned long pfn, nrpages;
for (si = shmems; si < &shmems[nr_shmems]; si++) {
pr_info("Dumping shared memory %lx\n", si->shmid);
nrpages = (si->size + PAGE_SIZE -1) / PAGE_SIZE;
map = xmalloc(nrpages * sizeof(*map));
if (!map)
goto err;
fd = open_proc(si->pid, "map_files/%lx-%lx", si->start, si->end);
if (fd < 0)
goto err;
addr = mmap(NULL, si->size, PROT_READ, MAP_SHARED, fd, 0);
close(fd);
if (addr == MAP_FAILED) {
pr_err("Can't map shmem %lx (%lx-%lx)\n",
si->shmid, si->start, si->end);
goto err;
}
err = mincore(addr, si->size, map);
if (err)
goto err_unmap;
fd = open_image(CR_FD_SHMEM_PAGES, O_WRONLY | O_CREAT, si->shmid);
if (fd < 0)
goto err_unmap;
for (pfn = 0; pfn < nrpages; pfn++) {
u64 offset = pfn * PAGE_SIZE;
if (!(map[pfn] & PAGE_RSS))
continue;
if (write_img_buf(fd, &offset, sizeof(offset)))
break;
if (write_img_buf(fd, addr + offset, PAGE_SIZE))
break;
}
if (pfn != nrpages)
goto err_close;
err = write_img(fd, &zero_page_entry);
if (err < 0)
goto err_close;
close(fd);
munmap(addr, si->size);
xfree(map);
}
return 0;
err_close:
close(fd);
err_unmap:
munmap(addr, si->size);
err:
xfree(map);
return -1;
}
int cr_dump_tasks(pid_t pid, const struct cr_options *opts)
{
LIST_HEAD(pstree_list);
......@@ -1440,6 +1556,11 @@ int cr_dump_tasks(pid_t pid, const struct cr_options *opts)
goto err;
close_cr_fdset(&cr_fdset);
nr_shmems = 0;
shmems = xmalloc(SHMEMS_SIZE);
if (!shmems)
goto err;
list_for_each_entry(item, &pstree_list, list) {
cr_fdset = cr_dump_fdset_open(item->pid, CR_FD_DESC_NONE, NULL);
if (!cr_fdset)
......@@ -1463,8 +1584,9 @@ int cr_dump_tasks(pid_t pid, const struct cr_options *opts)
if (opts->leader_only)
break;
}
ret = 0;
ret = cr_dump_shmem();
xfree(shmems);
err:
pstree_switch_state(&pstree_list, opts);
free_pstree(&pstree_list);
......
This diff is collapsed.
......@@ -54,25 +54,6 @@
static char local_buf[PAGE_SIZE];
static LIST_HEAD(pstree_list);
static void show_shmem(int fd_shmem)
{
struct shmem_entry e;
pr_img_head(CR_FD_SHMEM);
while (1) {
int ret;
ret = read_img_eof(fd_shmem, &e);
if (ret <= 0)
goto out;
pr_msg("0x%lx-0x%lx id %lu\n", e.start, e.end, e.shmid);
}
out:
pr_img_tail(CR_FD_SHMEM);
}
static void show_files(int fd_files)
{
struct fdinfo_entry e;
......@@ -491,9 +472,6 @@ static int cr_parse_file(struct cr_options *opts)
case CORE_MAGIC:
show_core(fd, opts->show_pages_content);
break;
case SHMEM_MAGIC:
show_shmem(fd);
break;
case PSTREE_MAGIC:
show_pstree(fd, NULL);
break;
......@@ -606,8 +584,6 @@ static int cr_show_all(unsigned long pid, struct cr_options *opts)
show_files(cr_fdset->fds[CR_FD_FDINFO]);
show_shmem(cr_fdset->fds[CR_FD_SHMEM]);
show_sigacts(cr_fdset->fds[CR_FD_SIGACT]);
show_unixsk(cr_fdset->fds[CR_FD_UNIXSK]);
......
......@@ -69,12 +69,6 @@ struct cr_fd_desc_tmpl fdset_template[CR_FD_MAX] = {
.magic = PSTREE_MAGIC,
},
/* info about which memory areas are shared */
[CR_FD_SHMEM] = {
.fmt = FMT_FNAME_SHMEM,
.magic = SHMEM_MAGIC,
},
/* info about signal handlers */
[CR_FD_SIGACT] = {
.fmt = FMT_FNAME_SIGACTS,
......
......@@ -21,10 +21,8 @@ enum {
CR_FD_FDINFO,
CR_FD_PAGES,
CR_FD_SHMEM_PAGES,
CR_FD_CORE,
CR_FD_PIPES,
CR_FD_SHMEM,
CR_FD_SIGACT,
CR_FD_UNIXSK,
CR_FD_INETSK,
......@@ -46,6 +44,8 @@ enum {
CR_FD_PID_MAX, /* fmt, pid */
CR_FD_SHMEM_PAGES,
CR_FD_MAX
};
......@@ -75,12 +75,11 @@ extern struct cr_fd_desc_tmpl fdset_template[CR_FD_MAX];
#define FMT_FNAME_FDINFO "fdinfo-%d.img"
#define FMT_FNAME_PAGES "pages-%d.img"
#define FMT_FNAME_SHMEM_PAGES "pages-shmem-%d.img"
#define FMT_FNAME_SHMEM_PAGES "pages-shmem-%ld.img"
#define FMT_FNAME_CORE "core-%d.img"
#define FMT_FNAME_CORE_OUT "core-%d.img.out"
#define FMT_FNAME_PIPES "pipes-%d.img"
#define FMT_FNAME_PSTREE "pstree-%d.img"
#define FMT_FNAME_SHMEM "shmem-%d.img"
#define FMT_FNAME_SIGACTS "sigacts-%d.img"
#define FMT_FNAME_UNIXSK "unixsk-%d.img"
#define FMT_FNAME_INETSK "inetsk-%d.img"
......@@ -119,10 +118,8 @@ struct cr_fdset {
#define CR_FD_DESC_TASK (\
CR_FD_DESC_USE(CR_FD_FDINFO) |\
CR_FD_DESC_USE(CR_FD_PAGES) |\
CR_FD_DESC_USE(CR_FD_SHMEM_PAGES) |\
CR_FD_DESC_USE(CR_FD_CORE) |\
CR_FD_DESC_USE(CR_FD_PIPES) |\
CR_FD_DESC_USE(CR_FD_SHMEM) |\
CR_FD_DESC_USE(CR_FD_SIGACT) |\
CR_FD_DESC_USE(CR_FD_UNIXSK) |\
CR_FD_DESC_USE(CR_FD_INETSK) |\
......
......@@ -13,7 +13,6 @@
#define FDINFO_MAGIC 0x56213732 /* Dmitrov */
#define PAGES_MAGIC 0x56084025 /* Vladimir */
#define CORE_MAGIC 0x55053847 /* Kolomna */
#define SHMEM_MAGIC 0x54123737 /* Tula */
#define PIPES_MAGIC 0x56513555 /* Tver */
#define SIGACT_MAGIC 0x55344201 /* Murom */
#define UNIXSK_MAGIC 0x54373943 /* Ryazan */
......@@ -55,12 +54,6 @@ struct fdinfo_entry {
((fe)->type == FDINFO_CWD) || \
((fe)->type == FDINFO_EXE))
struct shmem_entry {
u64 start;
u64 end;
u64 shmid;
} __packed;
struct pstree_entry {
u32 pid;
u32 nr_children;
......
......@@ -182,11 +182,20 @@ struct rt_sigframe {
#define SHMEMS_SIZE 4096
/*
* pid is a pid of a creater
* start, end are used for open mapping
* fd is a file discriptor, which is valid for creater,
* it's opened in cr-restor, because pgoff may be non zero
*/
struct shmem_info {
unsigned long shmid;
unsigned long start;
unsigned long end;
unsigned long shmid;
unsigned long size;
int pid;
int fd;
u32 lock; /* futex */
};
......@@ -209,18 +218,15 @@ struct task_entries {
u32 start; //futex
};
static always_inline struct shmem_info *
find_shmem_by_pid(struct shmems *shmems, unsigned long start, int pid)
find_shmem(struct shmems *shmems, unsigned long shmid)
{
struct shmem_info *si;
int i;
for (i = 0; i < shmems->nr_shmems; i++) {
si = &shmems->entries[i];
if (si->start == start &&
si->end > start &&
si->pid == pid)
if (si->shmid == shmid)
return si;
}
......
......@@ -538,10 +538,6 @@ int parasite_dump_pages_seized(struct parasite_ctl *ctl, struct list_head *vma_a
if (ret < 0)
goto out;
ret = parasite_prep_file(CR_FD_SHMEM_PAGES, ctl, cr_fdset);
if (ret < 0)
goto out;
ret = parasite_execute(PARASITE_CMD_DUMPPAGES_INIT, ctl, st, sizeof(*st));
if (ret < 0) {
pr_err("Dumping pages failed with %li at %li\n",
......@@ -566,15 +562,16 @@ int parasite_dump_pages_seized(struct parasite_ctl *ctl, struct list_head *vma_a
if (vma_area->vma.status & VMA_AREA_SYSVIPC)
continue;
if (vma_area_is(vma_area, VMA_ANON_SHARED))
continue;
pr_info_vma(vma_area);
parasite_dumppages.vma_entry = vma_area->vma;
if (vma_area_is(vma_area, VMA_ANON_PRIVATE) ||
vma_area_is(vma_area, VMA_FILE_PRIVATE))
vma_area_is(vma_area, VMA_FILE_PRIVATE)) {
parasite_dumppages.fd_type = PG_PRIV;
else if (vma_area_is(vma_area, VMA_ANON_SHARED))
parasite_dumppages.fd_type = PG_SHARED;
else {
} else {
pr_warn("Unexpected VMA area found\n");
continue;
}
......@@ -598,8 +595,6 @@ int parasite_dump_pages_seized(struct parasite_ctl *ctl, struct list_head *vma_a
if (write_img(cr_fdset->fds[CR_FD_PAGES], &zero_page_entry))
goto out;
if (write_img(cr_fdset->fds[CR_FD_SHMEM_PAGES], &zero_page_entry))
goto out;
pr_info("\n");
pr_info("Summary: %16li pages dumped\n", nrpages_dumped);
......@@ -607,7 +602,6 @@ int parasite_dump_pages_seized(struct parasite_ctl *ctl, struct list_head *vma_a
out:
fchmod(cr_fdset->fds[CR_FD_PAGES], CR_FD_PERM);
fchmod(cr_fdset->fds[CR_FD_SHMEM_PAGES], CR_FD_PERM);
pr_info("----------------------------------------\n");
return ret;
......
......@@ -123,14 +123,8 @@ static int dump_pages_init(parasite_status_t *st)
if (ret < 0)
goto err;
ret = fd_pages[PG_SHARED] = recv_fd(tsock);
if (ret < 0)
goto err_s;
return 0;
err_s:
sys_close(fd_pages[PG_PRIV]);
err:
SET_PARASITE_RET(st, ret);
return -1;
......@@ -246,7 +240,6 @@ err:
static int dump_pages_fini(void)
{
sys_close(fd_pages[PG_PRIV]);
sys_close(fd_pages[PG_SHARED]);
return 0;
}
......
......@@ -477,10 +477,10 @@ long restore_task(struct task_restore_core_args *args)
if (vma_entry_is(vma_entry, VMA_ANON_SHARED)) {
struct shmem_info *entry;
entry = find_shmem_by_pid(args->shmems,
vma_entry->start,
my_pid);
if (entry)
entry = find_shmem(args->shmems,
vma_entry->shmid);
if (entry && entry->pid == my_pid &&
entry->start == vma_entry->start)
cr_wait_set(&entry->lock, 1);
}
......
......@@ -63,7 +63,7 @@ void pr_vma(unsigned int loglevel, const struct vma_area *vma_area)
return;
print_on_level(loglevel, "s: %16lx e: %16lx l: %8liK p: %8x f: %8x pg: %8lx "
"vf: %s st: %s spc: %s\n",
"vf: %s st: %s spc: %-8s shmid: %8lx\n",
vma_area->vma.start, vma_area->vma.end,
KBYTES(vma_area_len(vma_area)),
vma_area->vma.prot,
......@@ -79,7 +79,8 @@ void pr_vma(unsigned int loglevel, const struct vma_area *vma_area)
((vma_area->vma.status & VMA_AREA_STACK) ? "stack" :
((vma_area->vma.status & VMA_AREA_HEAP) ? "heap" :
((vma_area->vma.status & VMA_AREA_VSYSCALL) ? "vsyscall" :
((vma_area->vma.status & VMA_AREA_VDSO) ? "vdso" : "n")))));
((vma_area->vma.status & VMA_AREA_VDSO) ? "vdso" : "n")))),
vma_area->vma.shmid);
}
int close_safe(int *fd)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment