Commit d1db4faf authored by Cyrill Gorcunov's avatar Cyrill Gorcunov Committed by Pavel Emelyanov

vdso: Don't fail if pagemap is not accessbile

We use page frame number to detect vDSO which has been remapped
in-place from runtime vDSO during restore. In such case if the
kernel is younger than 3.16 the "[vdso]" mark won't be reported
in procfs output.

Still to address recently reported CVEs and be able to run CRIU
in unprivileged mode we need to handle vDSO without pagemap access
and here is the deal -- when we find VMA which "looks like" vDSO
we try to scan it for vDSO symbols and if it matches we restore
its status without PFN access.

Here is some details on @pagemap access in-kernel history:

 - @pagemap introduced in commit 85863e475e59 where anyone
   which can attach to a task via ptrace is allowed to read
   data from @pagemap (Feb 4 2008, v2.6.25-rc1)

 - in commit 006ebb40d3d65 ptrace attach rule has been changed
   into ptrace read permission (May 19 2008, v2.6.27-rc1)

 - in commit ab676b7d6fbf4 opening of @pagemap become guarded
   with CAP_SYS_ADMIN because of leak of physical addresses
   into userspace (Mar 9 2015, v4.0-rc5)

 - in commit 1c90308e7a77a opening of @pagemap become available
   for regular users again (with ptrace read permission) but
   physical addresses of pages are hidden from non-privileged
   userd (Sep 8 2015, v4.3-rc1)
Signed-off-by: 's avatarCyrill Gorcunov <gorcunov@openvz.org>
Looks-good-to-me: Andrew Vagin <avagin@virtuozzo.com>
Signed-off-by: 's avatarPavel Emelyanov <xemul@parallels.com>
parent d10835c4
...@@ -93,6 +93,8 @@ struct parasite_vdso_vma_entry { ...@@ -93,6 +93,8 @@ struct parasite_vdso_vma_entry {
unsigned long proxy_vdso_addr; unsigned long proxy_vdso_addr;
unsigned long proxy_vvar_addr; unsigned long proxy_vvar_addr;
int is_marked; int is_marked;
bool try_fill_symtable;
bool is_vdso;
}; };
struct parasite_dump_pages_args { struct parasite_dump_pages_args {
......
...@@ -482,6 +482,15 @@ static int parasite_check_vdso_mark(struct parasite_vdso_vma_entry *args) ...@@ -482,6 +482,15 @@ static int parasite_check_vdso_mark(struct parasite_vdso_vma_entry *args)
args->is_marked = 0; args->is_marked = 0;
args->proxy_vdso_addr = VDSO_BAD_ADDR; args->proxy_vdso_addr = VDSO_BAD_ADDR;
args->proxy_vvar_addr = VVAR_BAD_ADDR; args->proxy_vvar_addr = VVAR_BAD_ADDR;
if (args->try_fill_symtable) {
struct vdso_symtable t;
if (vdso_fill_symtable((void *)args->start, args->len, &t))
args->is_vdso = false;
else
args->is_vdso = true;
}
} }
return 0; return 0;
......
...@@ -43,15 +43,20 @@ int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid, ...@@ -43,15 +43,20 @@ int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid,
struct vma_area *proxy_vdso_marked = NULL; struct vma_area *proxy_vdso_marked = NULL;
struct vma_area *proxy_vvar_marked = NULL; struct vma_area *proxy_vvar_marked = NULL;
struct parasite_vdso_vma_entry *args; struct parasite_vdso_vma_entry *args;
struct vma_area *vma;
int fd, ret, exit_code = -1; int fd, ret, exit_code = -1;
u64 pfn = VDSO_BAD_PFN;
struct vma_area *vma;
off_t off; off_t off;
u64 pfn;
args = parasite_args(ctl, struct parasite_vdso_vma_entry); args = parasite_args(ctl, struct parasite_vdso_vma_entry);
fd = open_proc(pid, "pagemap"); fd = __open_proc(pid, EPERM, O_RDONLY, "pagemap");
if (fd < 0) if (fd < 0) {
if (errno == EPERM) {
pr_info("Pagemap is unavailable, trying a slow way\n");
} else
return -1; return -1;
} else
BUG_ON(vdso_pfn == VDSO_BAD_PFN);
list_for_each_entry(vma, &vma_area_list->h, list) { list_for_each_entry(vma, &vma_area_list->h, list) {
if (!vma_area_is(vma, VMA_AREA_REGULAR)) if (!vma_area_is(vma, VMA_AREA_REGULAR))
...@@ -91,12 +96,18 @@ int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid, ...@@ -91,12 +96,18 @@ int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid,
* I need to poke every potentially marked vma, * I need to poke every potentially marked vma,
* otherwise if task never called for vdso functions * otherwise if task never called for vdso functions
* page frame number won't be reported. * page frame number won't be reported.
*
* Moreover, if page frame numbers are not accessible
* we have to scan the vma zone for vDSO elf structure
* which gonna be a slow way.
*/ */
args->start = vma->e->start; args->start = vma->e->start;
args->len = vma_area_len(vma); args->len = vma_area_len(vma);
args->try_fill_symtable = (fd < 0) ? true : false;
args->is_vdso = false;
if (parasite_execute_daemon(PARASITE_CMD_CHECK_VDSO_MARK, ctl)) { if (parasite_execute_daemon(PARASITE_CMD_CHECK_VDSO_MARK, ctl)) {
pr_err("vdso: Parasite failed to poke for mark\n"); pr_err("Parasite failed to poke for mark\n");
goto err; goto err;
} }
...@@ -116,6 +127,15 @@ int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid, ...@@ -116,6 +127,15 @@ int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid,
continue; continue;
} }
/*
* If we have an access to pagemap we can handle vDSO
* status early. Otherwise, in worst scenario, where
* the dumpee has been remapping vdso on its own and
* the kernel version is < 3.16, the vdso won't be
* detected via procfs status so we have to parse
* symbols in parasite code.
*/
if (fd >= 0) {
off = (vma->e->start / PAGE_SIZE) * sizeof(u64); off = (vma->e->start / PAGE_SIZE) * sizeof(u64);
ret = pread(fd, &pfn, sizeof(pfn), off); ret = pread(fd, &pfn, sizeof(pfn), off);
if (ret < 0 || ret != sizeof(pfn)) { if (ret < 0 || ret != sizeof(pfn)) {
...@@ -128,6 +148,7 @@ int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid, ...@@ -128,6 +148,7 @@ int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid,
pr_err("Unexpected page fram number 0 for pid %d\n", pid); pr_err("Unexpected page fram number 0 for pid %d\n", pid);
goto err; goto err;
} }
}
/* /*
* Setup proper VMA status. Note starting with 3.16 * Setup proper VMA status. Note starting with 3.16
...@@ -136,15 +157,15 @@ int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid, ...@@ -136,15 +157,15 @@ int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid,
* but only since that particular version of the * but only since that particular version of the
* kernel! * kernel!
*/ */
if (pfn == vdso_pfn) { if ((pfn == vdso_pfn && pfn != VDSO_BAD_PFN) || args->is_vdso) {
if (!vma_area_is(vma, VMA_AREA_VDSO)) { if (!vma_area_is(vma, VMA_AREA_VDSO)) {
pr_debug("vdso: Restore vDSO status by pfn at %lx\n", pr_debug("Restore vDSO status by pfn/symtable at %lx\n",
(long)vma->e->start); (long)vma->e->start);
vma->e->status |= VMA_AREA_VDSO; vma->e->status |= VMA_AREA_VDSO;
} }
} else { } else {
if (unlikely(vma_area_is(vma, VMA_AREA_VDSO))) { if (unlikely(vma_area_is(vma, VMA_AREA_VDSO))) {
pr_debug("vdso: Drop mishinted vDSO status at %lx\n", pr_debug("Drop mishinted vDSO status at %lx\n",
(long)vma->e->start); (long)vma->e->start);
vma->e->status &= ~VMA_AREA_VDSO; vma->e->status &= ~VMA_AREA_VDSO;
} }
...@@ -290,5 +311,8 @@ int vdso_init(void) ...@@ -290,5 +311,8 @@ int vdso_init(void)
{ {
if (vdso_fill_self_symtable(&vdso_sym_rt)) if (vdso_fill_self_symtable(&vdso_sym_rt))
return -1; return -1;
return vaddr_to_pfn(vdso_sym_rt.vma_start, &vdso_pfn); if (vaddr_to_pfn(vdso_sym_rt.vma_start, &vdso_pfn) != 0)
pr_info("VDSO detection turned off\n");
return 0;
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment