Commit d7684252 authored by Pavel Emelyanov's avatar Pavel Emelyanov Committed by Pavel Emelyanov

kdat: Handle pagemaps with zeroed pfns

Recent kernels allow for user to read proc pagemap file, but zero
pfns in it. Support this mode for user dumps.

https://github.com/xemul/criu/issues/101Signed-off-by: 's avatarPavel Emelyanov <xemul@virtuozzo.com>
Acked-by: 's avatarAndrew Vagin <avagin@virtuozzo.com>
parent 40184b97
...@@ -16,6 +16,13 @@ extern int kerndat_get_dirty_track(void); ...@@ -16,6 +16,13 @@ extern int kerndat_get_dirty_track(void);
extern int kerndat_fdinfo_has_lock(void); extern int kerndat_fdinfo_has_lock(void);
extern int kerndat_loginuid(bool only_dump); extern int kerndat_loginuid(bool only_dump);
enum pagemap_func {
PM_UNKNOWN,
PM_DISABLED, /* /proc/pid/pagemap doesn't open (user mode) */
PM_FLAGS_ONLY, /* pagemap zeroes pfn part (user mode) */
PM_FULL,
};
struct kerndat_s { struct kerndat_s {
dev_t shmem_dev; dev_t shmem_dev;
int tcp_max_rshare; int tcp_max_rshare;
...@@ -27,6 +34,7 @@ struct kerndat_s { ...@@ -27,6 +34,7 @@ struct kerndat_s {
unsigned long task_size; unsigned long task_size;
bool ipv6; bool ipv6;
bool has_loginuid; bool has_loginuid;
enum pagemap_func pmap;
}; };
extern struct kerndat_s kdat; extern struct kerndat_s kdat;
......
...@@ -41,6 +41,42 @@ struct kerndat_s kdat = { ...@@ -41,6 +41,42 @@ struct kerndat_s kdat = {
.tcp_max_rshare = 87380, .tcp_max_rshare = 87380,
}; };
static int check_pagemap(void)
{
int ret, fd;
u64 pfn = 0;
fd = __open_proc(PROC_SELF, EPERM, O_RDONLY, "pagemap");
if (fd < 0) {
if (errno == EPERM) {
pr_info("Pagemap disabled");
kdat.pmap = PM_DISABLED;
return 0;
}
return -1;
}
/* Get the PFN of some present page. Stack is here, so try it :) */
ret = pread(fd, &pfn, sizeof(pfn), (((unsigned long)&ret) / page_size()) * sizeof(pfn));
if (ret != sizeof(pfn)) {
pr_perror("Can't read pagemap");
return -1;
}
close(fd);
if ((pfn & PME_PFRAME_MASK) == 0) {
pr_info("Pagemap provides flags only\n");
kdat.pmap = PM_FLAGS_ONLY;
} else {
pr_info("Pagemap is fully functional\n");
kdat.pmap = PM_FULL;
}
return 0;
}
/* /*
* Anonymous shared mappings are backed by hidden tmpfs * Anonymous shared mappings are backed by hidden tmpfs
* mount. Find out its dev to distinguish such mappings * mount. Find out its dev to distinguish such mappings
...@@ -322,13 +358,15 @@ static int init_zero_page_pfn() ...@@ -322,13 +358,15 @@ static int init_zero_page_pfn()
return -1; return -1;
} }
if (kdat.pmap != PM_FULL) {
pr_info("Zero page detection failed, optimization turns off.\n");
return 0;
}
ret = vaddr_to_pfn((unsigned long)addr, &kdat.zero_page_pfn); ret = vaddr_to_pfn((unsigned long)addr, &kdat.zero_page_pfn);
munmap(addr, PAGE_SIZE); munmap(addr, PAGE_SIZE);
if (ret == 1) { if (kdat.zero_page_pfn == 0)
pr_info("Zero page detection failed, optimization turns off.\n");
ret = 0;
} else if (kdat.zero_page_pfn == 0)
ret = -1; ret = -1;
return ret; return ret;
...@@ -456,6 +494,8 @@ int kerndat_init(void) ...@@ -456,6 +494,8 @@ int kerndat_init(void)
{ {
int ret; int ret;
ret = check_pagemap();
if (!ret)
ret = kerndat_get_shmemdev(); ret = kerndat_get_shmemdev();
if (!ret) if (!ret)
ret = kerndat_get_dirty_track(); ret = kerndat_get_dirty_track();
...@@ -487,6 +527,8 @@ int kerndat_init_rst(void) ...@@ -487,6 +527,8 @@ int kerndat_init_rst(void)
* not available inside namespaces. * not available inside namespaces.
*/ */
ret = check_pagemap();
if (!ret)
ret = tcp_read_sysctl_limits(); ret = tcp_read_sysctl_limits();
if (!ret) if (!ret)
ret = get_last_cap(); ret = get_last_cap();
......
...@@ -46,13 +46,14 @@ int pmc_init(pmc_t *pmc, pid_t pid, const struct list_head *vma_head, size_t siz ...@@ -46,13 +46,14 @@ int pmc_init(pmc_t *pmc, pid_t pid, const struct list_head *vma_head, size_t siz
if (!pmc->map) if (!pmc->map)
goto err; goto err;
pmc->fd = __open_proc(pid, EPERM, O_RDONLY, "pagemap"); if (kdat.pmap == PM_DISABLED) {
if (pmc->fd < 0) { pmc->fd = -1;
if (errno != EPERM)
goto err;
pr_warn("No pagemap for %d available, " pr_warn("No pagemap for %d available, "
"switching to greedy mode\n", pid); "switching to greedy mode\n", pid);
} else {
pmc->fd = open_proc(pid, "pagemap");
if (pmc->fd < 0)
goto err;
} }
pr_debug("created for pid %d (takes %zu bytes)\n", pid, pmc->map_len); pr_debug("created for pid %d (takes %zu bytes)\n", pid, pmc->map_len);
......
...@@ -763,9 +763,9 @@ int vaddr_to_pfn(unsigned long vaddr, u64 *pfn) ...@@ -763,9 +763,9 @@ int vaddr_to_pfn(unsigned long vaddr, u64 *pfn)
int fd, ret = -1; int fd, ret = -1;
off_t off; off_t off;
fd = __open_proc(getpid(), EPERM, O_RDONLY, "pagemap"); fd = open_proc(getpid(), "pagemap");
if (fd < 0) if (fd < 0)
return errno == EPERM ? 1 : -1; return -1;
off = (vaddr / page_size()) * sizeof(u64); off = (vaddr / page_size()) * sizeof(u64);
ret = pread(fd, pfn, sizeof(*pfn), off); ret = pread(fd, pfn, sizeof(*pfn), off);
......
...@@ -43,20 +43,19 @@ int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid, ...@@ -43,20 +43,19 @@ int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid,
struct vma_area *proxy_vdso_marked = NULL; struct vma_area *proxy_vdso_marked = NULL;
struct vma_area *proxy_vvar_marked = NULL; struct vma_area *proxy_vvar_marked = NULL;
struct parasite_vdso_vma_entry *args; struct parasite_vdso_vma_entry *args;
int fd, ret, exit_code = -1; int fd = -1, ret, exit_code = -1;
u64 pfn = VDSO_BAD_PFN; u64 pfn = VDSO_BAD_PFN;
struct vma_area *vma; struct vma_area *vma;
off_t off; off_t off;
args = parasite_args(ctl, struct parasite_vdso_vma_entry); args = parasite_args(ctl, struct parasite_vdso_vma_entry);
fd = __open_proc(pid, EPERM, O_RDONLY, "pagemap"); if (kdat.pmap == PM_FULL) {
if (fd < 0) { BUG_ON(vdso_pfn == VDSO_BAD_PFN);
if (errno == EPERM) { fd = open_proc(pid, "pagemap");
pr_info("Pagemap is unavailable, trying a slow way\n"); if (fd < 0)
} else
return -1; return -1;
} else } else
BUG_ON(vdso_pfn == VDSO_BAD_PFN); pr_info("Pagemap is unavailable, trying a slow way\n");
list_for_each_entry(vma, &vma_area_list->h, list) { list_for_each_entry(vma, &vma_area_list->h, list) {
if (!vma_area_is(vma, VMA_AREA_REGULAR)) if (!vma_area_is(vma, VMA_AREA_REGULAR))
...@@ -311,8 +310,11 @@ int vdso_init(void) ...@@ -311,8 +310,11 @@ int vdso_init(void)
{ {
if (vdso_fill_self_symtable(&vdso_sym_rt)) if (vdso_fill_self_symtable(&vdso_sym_rt))
return -1; return -1;
if (vaddr_to_pfn(vdso_sym_rt.vma_start, &vdso_pfn) != 0)
if (kdat.pmap != PM_FULL)
pr_info("VDSO detection turned off\n"); pr_info("VDSO detection turned off\n");
else if (vaddr_to_pfn(vdso_sym_rt.vma_start, &vdso_pfn))
return -1;
return 0; return 0;
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment