Commit 52242be3 authored by Pavel Emelyanov's avatar Pavel Emelyanov

dump: Stop using mincore and switch to /proc/pid/pagemap

We use mincore to check which pages we should take with us into
the image. The anonymour present or swapped should go, file present
but not cow-ed should not.

The mincore syscall wasn't very helpful with this -- it didn't detect
swap, but did detect some non present pages (pagecache). Plus it
didn't know anything about cow-ing filemaps.

Andrey Morton suggested to use the pagemap file in proc, but it lacked
the importaint stuff -- the cow filemap bit. Now it's there and we
can switch to using it.

The mincore usage for shared memory is still there, as for _that_ case
it's correct.
Signed-off-by: 's avatarPavel Emelyanov <xemul@parallels.com>
parent cee5e817
......@@ -1819,6 +1819,13 @@ static int cr_dump_shmem(void)
goto err;
}
/*
* We can't use pagemap here, because this vma is
* not mapped to us at all, but mincore reports the
* pagecache status of a file, which is correct in
* this case.
*/
err = mincore(addr, si->size, map);
if (err)
goto err_unmap;
......
......@@ -101,16 +101,18 @@ static void sys_write_msg(const char *msg)
sys_write(logfd, msg, size);
}
static inline int should_dump_page(struct vma_entry *vmae, unsigned char mincore_flags)
#define PME_PRESENT (1ULL << 63)
#define PME_SWAP (1ULL << 62)
#define PME_FILE (1ULL << 61)
static inline int should_dump_page(struct vma_entry *vmae, u64 pme)
{
#ifdef PAGE_ANON
if (vma_entry_is(vmae, VMA_FILE_PRIVATE))
return mincore_flags & PAGE_ANON;
else
return mincore_flags & PAGE_RSS;
#else
return (mincore_flags & PAGE_RSS);
#endif
return (pme & (PME_PRESENT | PME_SWAP)) &&
/*
* Optimisation for private mapping pages, that haven't
* yet being COW-ed
*/
!(vma_entry_is(vmae, VMA_FILE_PRIVATE) && (pme & PME_FILE));
}
static int fd_pages = -1;
......@@ -137,29 +139,52 @@ static int dump_pages(struct parasite_dump_pages_args *args)
parasite_status_t *st = &args->status;
unsigned long nrpages, pfn, length;
unsigned long prot_old, prot_new;
unsigned char *map;
u64 *map, off;
int ret = -1, fd;
args->nrpages_dumped = 0;
args->nrpages_skipped = 0;
prot_old = prot_new = 0;
fd = fd_pages;
length = args->vma_entry.end - args->vma_entry.start;
nrpages = length / PAGE_SIZE;
pfn = args->vma_entry.start / PAGE_SIZE;
nrpages = (args->vma_entry.end - args->vma_entry.start) / PAGE_SIZE;
args->nrpages_total = nrpages;
length = nrpages * sizeof(*map);
/*
* brk should allow us to handle up to 128M of memory,
* otherwise call for mmap.
* Up to 10M of pagemap will handle 5G mapping.
*/
map = brk_alloc(nrpages);
map = brk_alloc(length);
if (!map) {
SET_PARASITE_RET(st, -ENOMEM);
goto err;
}
fd = sys_open("/proc/self/pagemap", O_RDONLY, 0);
if (fd < 0) {
sys_write_msg("Can't open self pagemap");
SET_PARASITE_RET(st, fd);
goto err_free;
}
off = pfn * sizeof(*map);
off = sys_lseek(fd, off, SEEK_SET);
if (off != pfn * sizeof(*map)) {
sys_write_msg("Can't seek pagemap");
SET_PARASITE_RET(st, off);
goto err_close;
}
ret = sys_read(fd, map, length);
if (ret != length) {
sys_write_msg("Can't read self pagemap");
SET_PARASITE_RET(st, ret);
goto err_free;
}
sys_close(fd);
fd = fd_pages;
/*
* Try to change page protection if needed so we would
* be able to dump contents.
......@@ -177,18 +202,6 @@ static int dump_pages(struct parasite_dump_pages_args *args)
}
}
/*
* Dumping the whole VMA range is not a common operation
* so stick for mincore as a basis.
*/
ret = sys_mincore((void *)args->vma_entry.start, length, map);
if (ret) {
sys_write_msg("sys_mincore failed\n");
SET_PARASITE_RET(st, ret);
goto err_free;
}
ret = 0;
for (pfn = 0; pfn < nrpages; pfn++) {
unsigned long vaddr, written;
......@@ -210,7 +223,7 @@ static int dump_pages(struct parasite_dump_pages_args *args)
}
args->nrpages_dumped++;
} else if (map[pfn] & PAGE_RSS)
} else if (map[pfn] & PME_PRESENT)
args->nrpages_skipped++;
}
......@@ -230,9 +243,13 @@ static int dump_pages(struct parasite_dump_pages_args *args)
ret = 0;
err_free:
brk_free(nrpages);
brk_free(length);
err:
return ret;
err_close:
sys_close(fd);
goto err_free;
}
static int dump_pages_fini(void)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment