Commit 18fe3575 authored by Cyrill Gorcunov's avatar Cyrill Gorcunov Committed by Pavel Emelyanov

vdso: Implement vDSO proxification of any vvar/vdso order

In latest linux-next the vdso zone is placed _after_ vvar
zone so eventually we need to handle any combination of
the following cases

 - no vvar zone
 - vvar before vdso
 - vvar after vdso

Here we address all them.
Signed-off-by: 's avatarCyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: 's avatarPavel Emelyanov <xemul@parallels.com>
parent 6446fd2c
...@@ -149,8 +149,8 @@ extern int vdso_init(void); ...@@ -149,8 +149,8 @@ extern int vdso_init(void);
extern int vdso_do_park(struct vdso_symtable *sym_rt, unsigned long park_at, unsigned long park_size); extern int vdso_do_park(struct vdso_symtable *sym_rt, unsigned long park_at, unsigned long park_size);
extern int vdso_fill_symtable(char *mem, size_t size, struct vdso_symtable *t); extern int vdso_fill_symtable(char *mem, size_t size, struct vdso_symtable *t);
extern int vdso_proxify(char *who, struct vdso_symtable *sym_rt, extern int vdso_proxify(char *who, struct vdso_symtable *sym_rt,
VmaEntry *vdso_vma, VmaEntry *vvar_vma, unsigned long vdso_rt_parked_at, size_t index,
unsigned long vdso_rt_parked_at); VmaEntry *vmas, size_t nr_vmas);
extern int vdso_redirect_calls(void *base_to, void *base_from, struct vdso_symtable *to, struct vdso_symtable *from); extern int vdso_redirect_calls(void *base_to, void *base_from, struct vdso_symtable *to, struct vdso_symtable *from);
extern int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid, extern int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid,
......
...@@ -291,13 +291,33 @@ int vdso_do_park(struct vdso_symtable *sym_rt, unsigned long park_at, unsigned l ...@@ -291,13 +291,33 @@ int vdso_do_park(struct vdso_symtable *sym_rt, unsigned long park_at, unsigned l
} }
int vdso_proxify(char *who, struct vdso_symtable *sym_rt, int vdso_proxify(char *who, struct vdso_symtable *sym_rt,
VmaEntry *vdso_vma, VmaEntry *vvar_vma, unsigned long vdso_rt_parked_at, size_t index,
unsigned long vdso_rt_parked_at) VmaEntry *vmas, size_t nr_vmas)
{ {
VmaEntry *vma_vdso = NULL, *vma_vvar = NULL;
struct vdso_symtable s = VDSO_SYMTABLE_INIT; struct vdso_symtable s = VDSO_SYMTABLE_INIT;
size_t size = vma_entry_len(vdso_vma);
bool remap_rt = false; bool remap_rt = false;
/*
* Figue out which kind of vdso tuple we get.
*/
if (vma_entry_is(&vmas[index], VMA_AREA_VDSO))
vma_vdso = &vmas[index];
else if (vma_entry_is(&vmas[index], VMA_AREA_VVAR))
vma_vvar = &vmas[index];
if (index < (nr_vmas - 1)) {
if (vma_entry_is(&vmas[index + 1], VMA_AREA_VDSO))
vma_vdso = &vmas[index + 1];
else if (vma_entry_is(&vmas[index + 1], VMA_AREA_VVAR))
vma_vvar = &vmas[index + 1];
}
if (!vma_vdso) {
pr_err("Can't find vDSO area in image\n");
return -1;
}
/* /*
* vDSO mark overwrites Elf program header of proxy vDSO thus * vDSO mark overwrites Elf program header of proxy vDSO thus
* it must never ever be greater in size. * it must never ever be greater in size.
...@@ -305,35 +325,35 @@ int vdso_proxify(char *who, struct vdso_symtable *sym_rt, ...@@ -305,35 +325,35 @@ int vdso_proxify(char *who, struct vdso_symtable *sym_rt,
BUILD_BUG_ON(sizeof(struct vdso_mark) > sizeof(Elf64_Phdr)); BUILD_BUG_ON(sizeof(struct vdso_mark) > sizeof(Elf64_Phdr));
/* /*
* Find symbols in dumpee vdso. * Find symbols in vDSO zone read from image.
*/ */
if (vdso_fill_symtable((void *)vdso_vma->start, size, &s)) if (vdso_fill_symtable((void *)vma_vdso->start, vma_entry_len(vma_vdso), &s))
return -1; return -1;
/* /*
* Try to figure out if the vDSO in image has the same symbols * Proxification strategy
* as run time vDSO, if yes we might try to reuse runtime vDSO *
* instead of one in image. * - There might be two vDSO zones: vdso code and optionally vvar data
* - To be able to use in-place remapping we need
* *
* In case if VVAR area is present at least it must have same * a) Size and order of vDSO zones are to match
* size as dumped one for inplace remap, also the order of zones * b) Symbols offsets must match
* must be matching. * c) Have same number of vDSO zones
*/ */
if (size == vdso_vma_size(sym_rt)) { if (vma_entry_len(vma_vdso) == vdso_vma_size(sym_rt)) {
size_t i; size_t i;
for (i = 0; i < ARRAY_SIZE(s.symbols); i++) { for (i = 0; i < ARRAY_SIZE(s.symbols); i++) {
if (s.symbols[i].offset != sym_rt->symbols[i].offset) if (s.symbols[i].offset != sym_rt->symbols[i].offset)
break; break;
} }
if (i == ARRAY_SIZE(s.symbols)) {
remap_rt = true;
if (vvar_vma && sym_rt->vvar_start != VVAR_BAD_ADDR) { if (i == ARRAY_SIZE(s.symbols)) {
remap_rt = (vvar_vma_size(sym_rt) == vma_entry_len(vvar_vma)); if (vma_vvar && sym_rt->vvar_start != VVAR_BAD_ADDR) {
remap_rt = (vvar_vma_size(sym_rt) == vma_entry_len(vma_vvar));
if (remap_rt) { if (remap_rt) {
long delta_rt = sym_rt->vvar_start - sym_rt->vma_start; long delta_rt = sym_rt->vvar_start - sym_rt->vma_start;
long delta_this = vvar_vma->start - vdso_vma->start; long delta_this = vma_vvar->start - vma_vdso->start;
remap_rt = (delta_rt ^ delta_this) < 0 ? false : true; remap_rt = (delta_rt ^ delta_this) < 0 ? false : true;
} }
...@@ -341,39 +361,49 @@ int vdso_proxify(char *who, struct vdso_symtable *sym_rt, ...@@ -341,39 +361,49 @@ int vdso_proxify(char *who, struct vdso_symtable *sym_rt,
} }
} }
pr_debug("image [vdso] %lx-%lx [vvar] %lx-%lx\n",
vma_vdso->start, vma_vdso->end,
vma_vvar ? vma_vvar->start : VVAR_BAD_ADDR,
vma_vvar ? vma_vvar->end : VVAR_BAD_ADDR);
/* /*
* Easy case -- the vdso from image has same offsets and size * Easy case -- the vdso from image has same offsets, order and size
* as runtime, so we simply remap runtime vdso to dumpee position * as runtime, so we simply remap runtime vdso to dumpee position
* without generating any proxy. Note we may remap VVAR vdso as * without generating any proxy.
* well which might not yet been mapped by a caller code. So *
* drop VMA_AREA_REGULAR from it and caller would not touch it * Note we may remap VVAR vdso as well which might not yet been mapped
* anymore. * by a caller code. So drop VMA_AREA_REGULAR from it and caller would
* not touch it anymore.
*/ */
if (remap_rt) { if (remap_rt) {
unsigned long vvar_rt_parked_at = VVAR_BAD_ADDR;
int ret = 0; int ret = 0;
pr_info("Runtime vdso/vvar matches dumpee, remap inplace\n"); pr_info("Runtime vdso/vvar matches dumpee, remap inplace\n");
if (sys_munmap((void *)vdso_vma->start, vma_entry_len(vdso_vma))) { if (sys_munmap((void *)vma_vdso->start, vma_entry_len(vma_vdso))) {
pr_err("Failed to unmap %s\n", who); pr_err("Failed to unmap %s\n", who);
return -1; return -1;
} }
if (vvar_vma) { if (vma_vvar) {
if (sys_munmap((void *)vvar_vma->start, vma_entry_len(vvar_vma))) { if (sys_munmap((void *)vma_vvar->start, vma_entry_len(vma_vvar))) {
pr_err("Failed to unmap %s\n", who); pr_err("Failed to unmap %s\n", who);
return -1; return -1;
} }
vvar_rt_parked_at = ALIGN(vvar_vma_size(sym_rt), PAGE_SIZE);
vvar_rt_parked_at+= vdso_rt_parked_at;
ret = vdso_remap(who, vvar_rt_parked_at, vvar_vma->start, vma_entry_len(vvar_vma));
vvar_vma->status &= ~VMA_AREA_REGULAR;
} }
ret |= vdso_remap(who, vdso_rt_parked_at, vdso_vma->start, vma_entry_len(vdso_vma)); if (vma_vvar) {
if (vma_vdso->start < vma_vvar->start) {
ret = vdso_remap(who, vdso_rt_parked_at, vma_vdso->start, vdso_vma_size(sym_rt));
vdso_rt_parked_at += vdso_vma_size(sym_rt);
ret |= vdso_remap(who, vdso_rt_parked_at, vma_vvar->start, vvar_vma_size(sym_rt));
} else {
ret = vdso_remap(who, vdso_rt_parked_at, vma_vvar->start, vvar_vma_size(sym_rt));
vdso_rt_parked_at += vvar_vma_size(sym_rt);
ret |= vdso_remap(who, vdso_rt_parked_at, vma_vdso->start, vdso_vma_size(sym_rt));
}
} else
ret = vdso_remap(who, vdso_rt_parked_at, vma_vdso->start, vdso_vma_size(sym_rt));
return ret; return ret;
} }
...@@ -385,8 +415,15 @@ int vdso_proxify(char *who, struct vdso_symtable *sym_rt, ...@@ -385,8 +415,15 @@ int vdso_proxify(char *who, struct vdso_symtable *sym_rt,
*/ */
pr_info("Runtime vdso mismatches dumpee, generate proxy\n"); pr_info("Runtime vdso mismatches dumpee, generate proxy\n");
/*
* Don't forget to shift if vvar is before vdso.
*/
if (sym_rt->vvar_start != VDSO_BAD_ADDR &&
sym_rt->vvar_start < sym_rt->vma_start)
vdso_rt_parked_at += vvar_vma_size(sym_rt);
if (vdso_redirect_calls((void *)vdso_rt_parked_at, if (vdso_redirect_calls((void *)vdso_rt_parked_at,
(void *)vdso_vma->start, (void *)vma_vdso->start,
sym_rt, &s)) { sym_rt, &s)) {
pr_err("Failed to proxify dumpee contents\n"); pr_err("Failed to proxify dumpee contents\n");
return -1; return -1;
...@@ -398,7 +435,7 @@ int vdso_proxify(char *who, struct vdso_symtable *sym_rt, ...@@ -398,7 +435,7 @@ int vdso_proxify(char *who, struct vdso_symtable *sym_rt,
* it's auto-generated every new session if proxy required. * it's auto-generated every new session if proxy required.
*/ */
sys_mprotect((void *)vdso_rt_parked_at, vdso_vma_size(sym_rt), PROT_WRITE); sys_mprotect((void *)vdso_rt_parked_at, vdso_vma_size(sym_rt), PROT_WRITE);
vdso_put_mark((void *)vdso_rt_parked_at, vdso_vma->start, vvar_vma ? vvar_vma->start : VVAR_BAD_ADDR); vdso_put_mark((void *)vdso_rt_parked_at, vma_vdso->start, vma_vvar ? vma_vvar->start : VVAR_BAD_ADDR);
sys_mprotect((void *)vdso_rt_parked_at, vdso_vma_size(sym_rt), VDSO_PROT); sys_mprotect((void *)vdso_rt_parked_at, vdso_vma_size(sym_rt), VDSO_PROT);
return 0; return 0;
} }
...@@ -17,8 +17,8 @@ ...@@ -17,8 +17,8 @@
#define vdso_vma_size(t) (0) #define vdso_vma_size(t) (0)
#define vdso_do_park(sym_rt, park_at, park_size) (0) #define vdso_do_park(sym_rt, park_at, park_size) (0)
#define vdso_remap(who, from, to, size) (0) #define vdso_remap(who, from, to, size) (0)
#define vdso_proxify(who, sym_rt, vdso_vma, \ #define vdso_proxify(who, sym_rt, vdso_rt_parked_at, \
vvar_vma, vdso_rt_parked_at) (0) index, vmas, nr_vmas) (0)
#endif /* CONFIG_VDSO */ #endif /* CONFIG_VDSO */
......
...@@ -729,18 +729,11 @@ long __export_restore_task(struct task_restore_args *args) ...@@ -729,18 +729,11 @@ long __export_restore_task(struct task_restore_args *args)
vma_entry->start, vma_entry_len(vma_entry))) vma_entry->start, vma_entry_len(vma_entry)))
goto core_restore_end; goto core_restore_end;
#ifdef CONFIG_VDSO #ifdef CONFIG_VDSO
if (vma_entry_is(vma_entry, VMA_AREA_VDSO)) { if (vma_entry_is(vma_entry, VMA_AREA_VDSO) ||
VmaEntry *vma_vvar; vma_entry_is(vma_entry, VMA_AREA_VVAR)) {
if (i + 1 < args->nr_vmas) {
vma_vvar = args->tgt_vmas + i + 1;
if (!vma_entry_is(vma_entry, VMA_AREA_VVAR))
vma_vvar = NULL;
} else
vma_vvar = NULL;
if (vdso_proxify("left dumpee", &args->vdso_sym_rt, if (vdso_proxify("left dumpee", &args->vdso_sym_rt,
vma_entry, vma_vvar, args->vdso_rt_parked_at,
args->vdso_rt_parked_at)) i, args->tgt_vmas, args->nr_vmas))
goto core_restore_end; goto core_restore_end;
} }
#endif #endif
...@@ -766,18 +759,11 @@ long __export_restore_task(struct task_restore_args *args) ...@@ -766,18 +759,11 @@ long __export_restore_task(struct task_restore_args *args)
vma_entry->start, vma_entry_len(vma_entry))) vma_entry->start, vma_entry_len(vma_entry)))
goto core_restore_end; goto core_restore_end;
#ifdef CONFIG_VDSO #ifdef CONFIG_VDSO
if (vma_entry_is(vma_entry, VMA_AREA_VDSO)) { if (vma_entry_is(vma_entry, VMA_AREA_VDSO) ||
VmaEntry *vma_vvar; vma_entry_is(vma_entry, VMA_AREA_VVAR)) {
if (i + 1 < args->nr_vmas) {
vma_vvar = args->tgt_vmas + i + 1;
if (!vma_entry_is(vma_entry, VMA_AREA_VVAR))
vma_vvar = NULL;
} else
vma_vvar = NULL;
if (vdso_proxify("right dumpee", &args->vdso_sym_rt, if (vdso_proxify("right dumpee", &args->vdso_sym_rt,
vma_entry, vma_vvar, args->vdso_rt_parked_at,
args->vdso_rt_parked_at)) i, args->tgt_vmas, args->nr_vmas))
goto core_restore_end; goto core_restore_end;
} }
#endif #endif
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment