Commit 99e0a0cc authored by Christopher Covington's avatar Christopher Covington Committed by Pavel Emelyanov

Copy x86 VDSO code to AArch64

While it duplicates hundreds of lines of code, this is the
short term strategy Cyrill and I have agreed to for supporting
VDSOs across multiple architectures [1]. With better
understanding of where things differ per-architecture, or even
improved consolidation in the kernel, we can hopefully move to
a more shared implementation in the future.

1. http://lists.openvz.org/pipermail/criu/2014-August/015218.htmlAcked-by: 's avatarCyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: 's avatarChristopher Covington <cov@codeaurora.org>
Signed-off-by: 's avatarPavel Emelyanov <xemul@parallels.com>
parent 0628118c
#ifndef __CR_ASM_VDSO_H__
#define __CR_ASM_VDSO_H__
#include <sys/types.h>
#include "asm/int.h"
#include "protobuf/vma.pb-c.h"
struct parasite_ctl;
struct vm_area_list;
#define VDSO_PROT (PROT_READ | PROT_EXEC)
#define VVAR_PROT (PROT_READ)
#define VDSO_BAD_ADDR (-1ul)
#define VVAR_BAD_ADDR VDSO_BAD_ADDR
#define VDSO_BAD_PFN (-1ull)
#define VVAR_BAD_PFN VDSO_BAD_PFN
struct vdso_symbol {
char name[32];
unsigned long offset;
};
#define VDSO_SYMBOL_INIT { .offset = VDSO_BAD_ADDR, }
/* Check if symbol present in symtable */
static inline bool vdso_symbol_empty(struct vdso_symbol *s)
{
return s->offset == VDSO_BAD_ADDR && s->name[0] == '\0';
}
/*
* This is a minimal amount of symbols
* we should support at the moment.
*/
enum {
VDSO_SYMBOL_CLOCK_GETTIME,
VDSO_SYMBOL_GETCPU,
VDSO_SYMBOL_GETTIMEOFDAY,
VDSO_SYMBOL_TIME,
VDSO_SYMBOL_MAX
};
struct vdso_symtable {
unsigned long vma_start;
unsigned long vma_end;
unsigned long vvar_start;
unsigned long vvar_end;
struct vdso_symbol symbols[VDSO_SYMBOL_MAX];
};
#define VDSO_SYMTABLE_INIT \
{ \
.vma_start = VDSO_BAD_ADDR, \
.vma_end = VDSO_BAD_ADDR, \
.vvar_start = VVAR_BAD_ADDR, \
.vvar_end = VVAR_BAD_ADDR, \
.symbols = { \
[0 ... VDSO_SYMBOL_MAX - 1] = \
(struct vdso_symbol)VDSO_SYMBOL_INIT, \
}, \
}
/* Size of VMA associated with vdso */
static inline unsigned long vdso_vma_size(struct vdso_symtable *t)
{
return t->vma_end - t->vma_start;
}
static inline unsigned long vvar_vma_size(struct vdso_symtable *t)
{
return t->vvar_end - t->vvar_start;
}
/*
* Special mark which allows to identify runtime vdso where
* calls from proxy vdso are redirected. This mark usually
* placed at the start of vdso area where Elf header lives.
* Since such runtime vdso is solevey used by proxy and
* nobody else is supposed to access it, it's more-less
* safe to screw the Elf header with @signature and
* @proxy_addr.
*
* The @proxy_addr deserves a few comments. When we redirect
* the calls from proxy to runtime vdso, on next checkpoint
* it won't be possible to find which VMA is proxy, thus
* we save its address in the member.
*/
struct vdso_mark {
u64 signature;
unsigned long proxy_vdso_addr;
unsigned long version;
/*
* In case of new vDSO format the VVAR area address
* neeed for easier discovering where it lives without
* relying on procfs output.
*/
unsigned long proxy_vvar_addr;
};
#define VDSO_MARK_SIGNATURE (0x6f73647675697263ULL) /* Magic number (criuvdso) */
#define VDSO_MARK_SIGNATURE_V2 (0x4f53447675697263ULL) /* Magic number (criuvDSO) */
#define VDSO_MARK_CUR_VERSION (2)
static inline void vdso_put_mark(void *where, unsigned long proxy_vdso_addr, unsigned long proxy_vvar_addr)
{
struct vdso_mark *m = where;
m->signature = VDSO_MARK_SIGNATURE_V2;
m->proxy_vdso_addr = proxy_vdso_addr;
m->version = VDSO_MARK_CUR_VERSION;
m->proxy_vvar_addr = proxy_vvar_addr;
}
static inline bool is_vdso_mark(void *addr)
{
struct vdso_mark *m = addr;
if (m->signature == VDSO_MARK_SIGNATURE_V2) {
/*
* New format
*/
return true;
} else if (m->signature == VDSO_MARK_SIGNATURE) {
/*
* Old format -- simply extend the mark up
* to the version we support.
*/
vdso_put_mark(m, m->proxy_vdso_addr, VVAR_BAD_ADDR);
return true;
}
return false;
}
#define VDSO_SYMBOL_CLOCK_GETTIME_NAME "__vdso_clock_gettime"
#define VDSO_SYMBOL_GETCPU_NAME "__vdso_getcpu"
#define VDSO_SYMBOL_GETTIMEOFDAY_NAME "__vdso_gettimeofday"
#define VDSO_SYMBOL_TIME_NAME "__vdso_time"
extern struct vdso_symtable vdso_sym_rt;
extern u64 vdso_pfn;
extern int vdso_init(void);
extern int vdso_do_park(struct vdso_symtable *sym_rt, unsigned long park_at, unsigned long park_size);
extern int vdso_fill_symtable(char *mem, size_t size, struct vdso_symtable *t);
extern int vdso_proxify(char *who, struct vdso_symtable *sym_rt,
unsigned long vdso_rt_parked_at, size_t index,
VmaEntry *vmas, size_t nr_vmas);
extern int vdso_redirect_calls(void *base_to, void *base_from, struct vdso_symtable *to, struct vdso_symtable *from);
extern int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid,
struct vm_area_list *vma_area_list);
#endif /* __CR_ASM_VDSO_H__ */
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <elf.h>
#include <fcntl.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include "asm/string.h"
#include "asm/types.h"
#include "compiler.h"
#include "syscall.h"
#include "image.h"
#include "vdso.h"
#include "vma.h"
#include "log.h"
#include "bug.h"
#ifdef LOG_PREFIX
# undef LOG_PREFIX
#endif
#define LOG_PREFIX "vdso: "
typedef struct {
u16 movabs;
u64 imm64;
u16 jmp_rax;
u32 guards;
} __packed jmp_t;
int vdso_redirect_calls(void *base_to, void *base_from,
struct vdso_symtable *to,
struct vdso_symtable *from)
{
jmp_t jmp = {
.movabs = 0xb848,
.jmp_rax = 0xe0ff,
.guards = 0xcccccccc,
};
unsigned int i;
for (i = 0; i < ARRAY_SIZE(to->symbols); i++) {
if (vdso_symbol_empty(&from->symbols[i]))
continue;
pr_debug("jmp: %lx/%lx -> %lx/%lx (index %d)\n",
(unsigned long)base_from, from->symbols[i].offset,
(unsigned long)base_to, to->symbols[i].offset, i);
jmp.imm64 = (unsigned long)base_to + to->symbols[i].offset;
builtin_memcpy((void *)(base_from + from->symbols[i].offset), &jmp, sizeof(jmp));
}
return 0;
}
/* Check if pointer is out-of-bound */
static bool __ptr_oob(void *ptr, void *start, size_t size)
{
void *end = (void *)((unsigned long)start + size);
return ptr > end || ptr < start;
}
/*
* Elf hash, see format specification.
*/
static unsigned long elf_hash(const unsigned char *name)
{
unsigned long h = 0, g;
while (*name) {
h = (h << 4) + *name++;
g = h & 0xf0000000ul;
if (g)
h ^= g >> 24;
h &= ~g;
}
return h;
}
int vdso_fill_symtable(char *mem, size_t size, struct vdso_symtable *t)
{
Elf64_Phdr *dynamic = NULL, *load = NULL;
Elf64_Ehdr *ehdr = (void *)mem;
Elf64_Dyn *dyn_strtab = NULL;
Elf64_Dyn *dyn_symtab = NULL;
Elf64_Dyn *dyn_strsz = NULL;
Elf64_Dyn *dyn_syment = NULL;
Elf64_Dyn *dyn_hash = NULL;
Elf64_Word *hash = NULL;
Elf64_Phdr *phdr;
Elf64_Dyn *d;
Elf64_Word *bucket, *chain;
Elf64_Word nbucket, nchain;
/*
* See Elf specification for this magic values.
*/
const char elf_ident[] = {
0x7f, 0x45, 0x4c, 0x46, 0x02, 0x01, 0x01, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
};
const char *vdso_symbols[VDSO_SYMBOL_MAX] = {
[VDSO_SYMBOL_CLOCK_GETTIME] = VDSO_SYMBOL_CLOCK_GETTIME_NAME,
[VDSO_SYMBOL_GETCPU] = VDSO_SYMBOL_GETCPU_NAME,
[VDSO_SYMBOL_GETTIMEOFDAY] = VDSO_SYMBOL_GETTIMEOFDAY_NAME,
[VDSO_SYMBOL_TIME] = VDSO_SYMBOL_TIME_NAME,
};
char *dynsymbol_names;
unsigned int i, j, k;
BUILD_BUG_ON(sizeof(elf_ident) != sizeof(ehdr->e_ident));
pr_debug("Parsing at %lx %lx\n", (long)mem, (long)mem + (long)size);
/*
* Make sure it's a file we support.
*/
if (builtin_memcmp(ehdr->e_ident, elf_ident, sizeof(elf_ident))) {
pr_err("Elf header magic mismatch\n");
return -EINVAL;
}
/*
* We need PT_LOAD and PT_DYNAMIC here. Each once.
*/
phdr = (void *)&mem[ehdr->e_phoff];
for (i = 0; i < ehdr->e_phnum; i++, phdr++) {
if (__ptr_oob(phdr, mem, size))
goto err_oob;
switch (phdr->p_type) {
case PT_DYNAMIC:
if (dynamic) {
pr_err("Second PT_DYNAMIC header\n");
return -EINVAL;
}
dynamic = phdr;
break;
case PT_LOAD:
if (load) {
pr_err("Second PT_LOAD header\n");
return -EINVAL;
}
load = phdr;
break;
}
}
if (!load || !dynamic) {
pr_err("One of obligated program headers is missed\n");
return -EINVAL;
}
pr_debug("PT_LOAD p_vaddr: %lx\n", (unsigned long)load->p_vaddr);
/*
* Dynamic section tags should provide us the rest of information
* needed. Note that we're interested in a small set of tags.
*/
d = (void *)&mem[dynamic->p_offset];
for (i = 0; i < dynamic->p_filesz / sizeof(*d); i++, d++) {
if (__ptr_oob(d, mem, size))
goto err_oob;
if (d->d_tag == DT_NULL) {
break;
} else if (d->d_tag == DT_STRTAB) {
dyn_strtab = d;
pr_debug("DT_STRTAB: %p\n", (void *)d->d_un.d_ptr);
} else if (d->d_tag == DT_SYMTAB) {
dyn_symtab = d;
pr_debug("DT_SYMTAB: %p\n", (void *)d->d_un.d_ptr);
} else if (d->d_tag == DT_STRSZ) {
dyn_strsz = d;
pr_debug("DT_STRSZ: %lu\n", (unsigned long)d->d_un.d_val);
} else if (d->d_tag == DT_SYMENT) {
dyn_syment = d;
pr_debug("DT_SYMENT: %lu\n", (unsigned long)d->d_un.d_val);
} else if (d->d_tag == DT_HASH) {
dyn_hash = d;
pr_debug("DT_HASH: %p\n", (void *)d->d_un.d_ptr);
}
}
if (!dyn_strtab || !dyn_symtab || !dyn_strsz || !dyn_syment || !dyn_hash) {
pr_err("Not all dynamic entries are present\n");
return -EINVAL;
}
dynsymbol_names = &mem[dyn_strtab->d_un.d_val - load->p_vaddr];
if (__ptr_oob(dynsymbol_names, mem, size))
goto err_oob;
hash = (void *)&mem[(unsigned long)dyn_hash->d_un.d_ptr - (unsigned long)load->p_vaddr];
if (__ptr_oob(hash, mem, size))
goto err_oob;
nbucket = hash[0];
nchain = hash[1];
bucket = &hash[2];
chain = &hash[nbucket + 2];
pr_debug("nbucket %lu nchain %lu bucket %p chain %p\n",
(long)nbucket, (long)nchain, bucket, chain);
for (i = 0; i < ARRAY_SIZE(vdso_symbols); i++) {
k = elf_hash((const unsigned char *)vdso_symbols[i]);
for (j = bucket[k % nbucket]; j < nchain && chain[j] != STN_UNDEF; j = chain[j]) {
Elf64_Sym *sym = (void *)&mem[dyn_symtab->d_un.d_ptr - load->p_vaddr];
char *name;
sym = &sym[j];
if (__ptr_oob(sym, mem, size))
continue;
if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC &&
ELF64_ST_BIND(sym->st_info) != STB_GLOBAL)
continue;
name = &dynsymbol_names[sym->st_name];
if (__ptr_oob(name, mem, size))
continue;
if (builtin_strcmp(name, vdso_symbols[i]))
continue;
builtin_memcpy(t->symbols[i].name, name, sizeof(t->symbols[i].name));
t->symbols[i].offset = (unsigned long)sym->st_value - load->p_vaddr;
break;
}
}
return 0;
err_oob:
pr_err("Corrupted Elf data\n");
return -EFAULT;
}
static int vdso_remap(char *who, unsigned long from, unsigned long to, size_t size)
{
unsigned long addr;
pr_debug("Remap %s %lx -> %lx\n", who, from, to);
addr = sys_mremap(from, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, to);
if (addr != to) {
pr_err("Unable to remap %lx -> %lx %lx\n",
from, to, addr);
return -1;
}
return 0;
}
/* Park runtime vDSO in some safe place where it can be accessible from restorer */
int vdso_do_park(struct vdso_symtable *sym_rt, unsigned long park_at, unsigned long park_size)
{
int ret;
BUG_ON((vdso_vma_size(sym_rt) + vvar_vma_size(sym_rt)) < park_size);
if (sym_rt->vvar_start != VDSO_BAD_ADDR) {
if (sym_rt->vma_start < sym_rt->vvar_start) {
ret = vdso_remap("rt-vdso", sym_rt->vma_start,
park_at, vdso_vma_size(sym_rt));
park_at += vdso_vma_size(sym_rt);
ret |= vdso_remap("rt-vvar", sym_rt->vvar_start,
park_at, vvar_vma_size(sym_rt));
} else {
ret = vdso_remap("rt-vvar", sym_rt->vvar_start,
park_at, vvar_vma_size(sym_rt));
park_at += vvar_vma_size(sym_rt);
ret |= vdso_remap("rt-vdso", sym_rt->vma_start,
park_at, vdso_vma_size(sym_rt));
}
} else
ret = vdso_remap("rt-vdso", sym_rt->vma_start,
park_at, vdso_vma_size(sym_rt));
return ret;
}
int vdso_proxify(char *who, struct vdso_symtable *sym_rt,
unsigned long vdso_rt_parked_at, size_t index,
VmaEntry *vmas, size_t nr_vmas)
{
VmaEntry *vma_vdso = NULL, *vma_vvar = NULL;
struct vdso_symtable s = VDSO_SYMTABLE_INIT;
bool remap_rt = false;
/*
* Figue out which kind of vdso tuple we get.
*/
if (vma_entry_is(&vmas[index], VMA_AREA_VDSO))
vma_vdso = &vmas[index];
else if (vma_entry_is(&vmas[index], VMA_AREA_VVAR))
vma_vvar = &vmas[index];
if (index < (nr_vmas - 1)) {
if (vma_entry_is(&vmas[index + 1], VMA_AREA_VDSO))
vma_vdso = &vmas[index + 1];
else if (vma_entry_is(&vmas[index + 1], VMA_AREA_VVAR))
vma_vvar = &vmas[index + 1];
}
if (!vma_vdso) {
pr_err("Can't find vDSO area in image\n");
return -1;
}
/*
* vDSO mark overwrites Elf program header of proxy vDSO thus
* it must never ever be greater in size.
*/
BUILD_BUG_ON(sizeof(struct vdso_mark) > sizeof(Elf64_Phdr));
/*
* Find symbols in vDSO zone read from image.
*/
if (vdso_fill_symtable((void *)vma_vdso->start, vma_entry_len(vma_vdso), &s))
return -1;
/*
* Proxification strategy
*
* - There might be two vDSO zones: vdso code and optionally vvar data
* - To be able to use in-place remapping we need
*
* a) Size and order of vDSO zones are to match
* b) Symbols offsets must match
* c) Have same number of vDSO zones
*/
if (vma_entry_len(vma_vdso) == vdso_vma_size(sym_rt)) {
size_t i;
for (i = 0; i < ARRAY_SIZE(s.symbols); i++) {
if (s.symbols[i].offset != sym_rt->symbols[i].offset)
break;
}
if (i == ARRAY_SIZE(s.symbols)) {
if (vma_vvar && sym_rt->vvar_start != VVAR_BAD_ADDR) {
remap_rt = (vvar_vma_size(sym_rt) == vma_entry_len(vma_vvar));
if (remap_rt) {
long delta_rt = sym_rt->vvar_start - sym_rt->vma_start;
long delta_this = vma_vvar->start - vma_vdso->start;
remap_rt = (delta_rt ^ delta_this) < 0 ? false : true;
}
}
}
}
pr_debug("image [vdso] %lx-%lx [vvar] %lx-%lx\n",
vma_vdso->start, vma_vdso->end,
vma_vvar ? vma_vvar->start : VVAR_BAD_ADDR,
vma_vvar ? vma_vvar->end : VVAR_BAD_ADDR);
/*
* Easy case -- the vdso from image has same offsets, order and size
* as runtime, so we simply remap runtime vdso to dumpee position
* without generating any proxy.
*
* Note we may remap VVAR vdso as well which might not yet been mapped
* by a caller code. So drop VMA_AREA_REGULAR from it and caller would
* not touch it anymore.
*/
if (remap_rt) {
int ret = 0;
pr_info("Runtime vdso/vvar matches dumpee, remap inplace\n");
if (sys_munmap((void *)vma_vdso->start, vma_entry_len(vma_vdso))) {
pr_err("Failed to unmap %s\n", who);
return -1;
}
if (vma_vvar) {
if (sys_munmap((void *)vma_vvar->start, vma_entry_len(vma_vvar))) {
pr_err("Failed to unmap %s\n", who);
return -1;
}
}
if (vma_vvar) {
if (vma_vdso->start < vma_vvar->start) {
ret = vdso_remap(who, vdso_rt_parked_at, vma_vdso->start, vdso_vma_size(sym_rt));
vdso_rt_parked_at += vdso_vma_size(sym_rt);
ret |= vdso_remap(who, vdso_rt_parked_at, vma_vvar->start, vvar_vma_size(sym_rt));
} else {
ret = vdso_remap(who, vdso_rt_parked_at, vma_vvar->start, vvar_vma_size(sym_rt));
vdso_rt_parked_at += vvar_vma_size(sym_rt);
ret |= vdso_remap(who, vdso_rt_parked_at, vma_vdso->start, vdso_vma_size(sym_rt));
}
} else
ret = vdso_remap(who, vdso_rt_parked_at, vma_vdso->start, vdso_vma_size(sym_rt));
return ret;
}
/*
* Now complex case -- we need to proxify calls. We redirect
* calls from dumpee vdso to runtime vdso, making dumpee
* to operate as proxy vdso.
*/
pr_info("Runtime vdso mismatches dumpee, generate proxy\n");
/*
* Don't forget to shift if vvar is before vdso.
*/
if (sym_rt->vvar_start != VDSO_BAD_ADDR &&
sym_rt->vvar_start < sym_rt->vma_start)
vdso_rt_parked_at += vvar_vma_size(sym_rt);
if (vdso_redirect_calls((void *)vdso_rt_parked_at,
(void *)vma_vdso->start,
sym_rt, &s)) {
pr_err("Failed to proxify dumpee contents\n");
return -1;
}
/*
* Put a special mark into runtime vdso, thus at next checkpoint
* routine we could detect this vdso and do not dump it, since
* it's auto-generated every new session if proxy required.
*/
sys_mprotect((void *)vdso_rt_parked_at, vdso_vma_size(sym_rt), PROT_WRITE);
vdso_put_mark((void *)vdso_rt_parked_at, vma_vdso->start, vma_vvar ? vma_vvar->start : VVAR_BAD_ADDR);
sys_mprotect((void *)vdso_rt_parked_at, vdso_vma_size(sym_rt), VDSO_PROT);
return 0;
}
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <elf.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include "asm/types.h"
#include "asm/parasite-syscall.h"
#include "parasite-syscall.h"
#include "parasite.h"
#include "compiler.h"
#include "kerndat.h"
#include "vdso.h"
#include "util.h"
#include "log.h"
#include "mem.h"
#include "vma.h"
#ifdef LOG_PREFIX
# undef LOG_PREFIX
#endif
#define LOG_PREFIX "vdso: "
struct vdso_symtable vdso_sym_rt = VDSO_SYMTABLE_INIT;
u64 vdso_pfn = VDSO_BAD_PFN;
/*
* The VMAs list might have proxy vdso/vvar areas left
* from previous dump/restore cycle so we need to detect
* them and eliminated from the VMAs list, they will be
* generated again on restore if needed.
*/
int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid,
struct vm_area_list *vma_area_list)
{
unsigned long proxy_vdso_addr = VDSO_BAD_ADDR;
unsigned long proxy_vvar_addr = VVAR_BAD_ADDR;
struct vma_area *proxy_vdso_marked = NULL;
struct vma_area *proxy_vvar_marked = NULL;
struct parasite_vdso_vma_entry *args;
struct vma_area *vma;
int fd, ret = -1;
off_t off;
u64 pfn;
args = parasite_args(ctl, struct parasite_vdso_vma_entry);
fd = open_proc(pid, "pagemap");
if (fd < 0)
return -1;
list_for_each_entry(vma, &vma_area_list->h, list) {
if (!vma_area_is(vma, VMA_AREA_REGULAR))
continue;
if (vma_area_is(vma, VMA_FILE_SHARED) ||
vma_area_is(vma, VMA_FILE_PRIVATE))
continue;
/*
* It might be possible VVAR area from marked
* vDSO zone, we need to detect it earlier than
* VDSO_PROT test because VVAR_PROT is a subset
* of it but don't yield continue here,
* sigh... what a mess.
*/
BUILD_BUG_ON(!(VDSO_PROT & VVAR_PROT));
if ((vma->e->prot & VVAR_PROT) == VVAR_PROT) {
if (proxy_vvar_addr != VVAR_BAD_ADDR &&
proxy_vvar_addr == vma->e->start) {
BUG_ON(proxy_vvar_marked);
proxy_vvar_marked = vma;
continue;
}
}
if ((vma->e->prot & VDSO_PROT) != VDSO_PROT)
continue;
if (vma->e->start > TASK_SIZE)
continue;
if (vma->e->flags & MAP_GROWSDOWN)
continue;
/*
* I need to poke every potentially marked vma,
* otherwise if task never called for vdso functions
* page frame number won't be reported.
*/
args->start = vma->e->start;
args->len = vma_area_len(vma);
if (parasite_execute_daemon(PARASITE_CMD_CHECK_VDSO_MARK, ctl)) {
pr_err("vdso: Parasite failed to poke for mark\n");
ret = -1;
goto err;
}
/*
* Defer handling marked vdso until we walked over
* all vmas and restore potentially remapped vDSO
* area status.
*/
if (unlikely(args->is_marked)) {
if (proxy_vdso_marked) {
pr_err("Ow! Second vdso mark detected!\n");
ret = -1;
goto err;
}
proxy_vdso_marked = vma;
proxy_vdso_addr = args->proxy_vdso_addr;
proxy_vvar_addr = args->proxy_vvar_addr;
continue;
}
off = (vma->e->start / PAGE_SIZE) * sizeof(u64);
ret = pread(fd, &pfn, sizeof(pfn), off);
if (ret < 0 || ret != sizeof(pfn)) {
pr_perror("Can't read pme for pid %d", pid);
ret = -1;
goto err;
}
pfn = PME_PFRAME(pfn);
if (!pfn) {
pr_err("Unexpected page fram number 0 for pid %d\n", pid);
ret = -1;
goto err;
}
/*
* Setup proper VMA status. Note starting with 3.16
* the [vdso]/[vvar] marks are reported correctly
* even when they are remapped into a new place,
* but only since that particular version of the
* kernel!
*/
if (pfn == vdso_pfn) {
if (!vma_area_is(vma, VMA_AREA_VDSO)) {
pr_debug("vdso: Restore vDSO status by pfn at %lx\n",
(long)vma->e->start);
vma->e->status |= VMA_AREA_VDSO;
}
} else {
if (unlikely(vma_area_is(vma, VMA_AREA_VDSO))) {
pr_debug("vdso: Drop mishinted vDSO status at %lx\n",
(long)vma->e->start);
vma->e->status &= ~VMA_AREA_VDSO;
}
}
}
/*
* There is marked vdso, it means such vdso is autogenerated
* and must be dropped from vma list.
*/
if (proxy_vdso_marked) {
pr_debug("vdso: Found marked at %lx (proxy vDSO at %lx VVAR at %lx)\n",
(long)proxy_vdso_marked->e->start,
(long)proxy_vdso_addr, (long)proxy_vvar_addr);
/*
* Don't forget to restore the proxy vdso/vvar status, since
* it's unknown to the kernel.
*/
list_for_each_entry(vma, &vma_area_list->h, list) {
if (vma->e->start == proxy_vdso_addr) {
vma->e->status |= VMA_AREA_REGULAR | VMA_AREA_VDSO;
pr_debug("vdso: Restore proxy vDSO status at %lx\n",
(long)vma->e->start);
} else if (vma->e->start == proxy_vvar_addr) {
vma->e->status |= VMA_AREA_REGULAR | VMA_AREA_VVAR;
pr_debug("vdso: Restore proxy VVAR status at %lx\n",
(long)vma->e->start);
}
}
pr_debug("vdso: Droppping marked vdso at %lx\n",
(long)proxy_vdso_marked->e->start);
list_del(&proxy_vdso_marked->list);
xfree(proxy_vdso_marked);
vma_area_list->nr--;
if (proxy_vvar_marked) {
pr_debug("vdso: Droppping marked vvar at %lx\n",
(long)proxy_vvar_marked->e->start);
list_del(&proxy_vvar_marked->list);
xfree(proxy_vvar_marked);
vma_area_list->nr--;
}
}
ret = 0;
err:
close(fd);
return ret;
}
static int vdso_fill_self_symtable(struct vdso_symtable *s)
{
char buf[512];
int ret = -1;
FILE *maps;
*s = (struct vdso_symtable)VDSO_SYMTABLE_INIT;
maps = fopen("/proc/self/maps", "r");
if (!maps) {
pr_perror("Can't open self-vma");
return -1;
}
while (fgets(buf, sizeof(buf), maps)) {
unsigned long start, end;
char *has_vdso, *has_vvar;
has_vdso = strstr(buf, "[vdso]");
if (!has_vdso)
has_vvar = strstr(buf, "[vvar]");
else
has_vvar = NULL;
if (!has_vdso && !has_vvar)
continue;
ret = sscanf(buf, "%lx-%lx", &start, &end);
if (ret != 2) {
ret = -1;
pr_err("Can't find vDSO/VVAR bounds\n");
goto err;
}
if (has_vdso) {
if (s->vma_start != VDSO_BAD_ADDR) {
pr_err("Got second vDSO entry\n");
ret = -1;
goto err;
}
s->vma_start = start;
s->vma_end = end;
ret = vdso_fill_symtable((void *)start, end - start, s);
if (ret)
goto err;
} else {
if (s->vvar_start != VVAR_BAD_ADDR) {
pr_err("Got second VVAR entry\n");
ret = -1;
goto err;
}
s->vvar_start = start;
s->vvar_end = end;
}
}
/*
* Validate its structure -- for new vDSO format the
* structure must be like
*
* 7fff1f5fd000-7fff1f5fe000 r-xp 00000000 00:00 0 [vdso]
* 7fff1f5fe000-7fff1f600000 r--p 00000000 00:00 0 [vvar]
*
* The areas may be in reverse order.
*
* 7fffc3502000-7fffc3504000 r--p 00000000 00:00 0 [vvar]
* 7fffc3504000-7fffc3506000 r-xp 00000000 00:00 0 [vdso]
*
*/
ret = 0;
if (s->vma_start != VDSO_BAD_ADDR) {
if (s->vvar_start != VVAR_BAD_ADDR) {
if (s->vma_end != s->vvar_start &&
s->vvar_end != s->vma_start) {
ret = -1;
pr_err("Unexpected rt vDSO area bounds\n");
goto err;
}
}
} else {
ret = -1;
pr_err("Can't find rt vDSO\n");
goto err;
}
pr_debug("rt [vdso] %lx-%lx [vvar] %lx-%lx\n",
s->vma_start, s->vma_end,
s->vvar_start, s->vvar_end);
err:
fclose(maps);
return ret;
}
int vdso_init(void)
{
if (vdso_fill_self_symtable(&vdso_sym_rt))
return -1;
return vaddr_to_pfn(vdso_sym_rt.vma_start, &vdso_pfn);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment