Commit 99e0a0cc authored by Christopher Covington's avatar Christopher Covington Committed by Pavel Emelyanov

Copy x86 VDSO code to AArch64

While it duplicates hundreds of lines of code, this is the
short term strategy Cyrill and I have agreed to for supporting
VDSOs across multiple architectures [1]. With better
understanding of where things differ per-architecture, or even
improved consolidation in the kernel, we can hopefully move to
a more shared implementation in the future.

1. http://lists.openvz.org/pipermail/criu/2014-August/015218.htmlAcked-by: 's avatarCyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: 's avatarChristopher Covington <cov@codeaurora.org>
Signed-off-by: 's avatarPavel Emelyanov <xemul@parallels.com>
parent 0628118c
#ifndef __CR_ASM_VDSO_H__
#define __CR_ASM_VDSO_H__
#include <sys/types.h>
#include "asm/int.h"
#include "protobuf/vma.pb-c.h"
struct parasite_ctl;
struct vm_area_list;
#define VDSO_PROT (PROT_READ | PROT_EXEC)
#define VVAR_PROT (PROT_READ)
#define VDSO_BAD_ADDR (-1ul)
#define VVAR_BAD_ADDR VDSO_BAD_ADDR
#define VDSO_BAD_PFN (-1ull)
#define VVAR_BAD_PFN VDSO_BAD_PFN
struct vdso_symbol {
char name[32];
unsigned long offset;
};
#define VDSO_SYMBOL_INIT { .offset = VDSO_BAD_ADDR, }
/* Check if symbol present in symtable */
static inline bool vdso_symbol_empty(struct vdso_symbol *s)
{
return s->offset == VDSO_BAD_ADDR && s->name[0] == '\0';
}
/*
* This is a minimal amount of symbols
* we should support at the moment.
*/
enum {
VDSO_SYMBOL_CLOCK_GETTIME,
VDSO_SYMBOL_GETCPU,
VDSO_SYMBOL_GETTIMEOFDAY,
VDSO_SYMBOL_TIME,
VDSO_SYMBOL_MAX
};
struct vdso_symtable {
unsigned long vma_start;
unsigned long vma_end;
unsigned long vvar_start;
unsigned long vvar_end;
struct vdso_symbol symbols[VDSO_SYMBOL_MAX];
};
#define VDSO_SYMTABLE_INIT \
{ \
.vma_start = VDSO_BAD_ADDR, \
.vma_end = VDSO_BAD_ADDR, \
.vvar_start = VVAR_BAD_ADDR, \
.vvar_end = VVAR_BAD_ADDR, \
.symbols = { \
[0 ... VDSO_SYMBOL_MAX - 1] = \
(struct vdso_symbol)VDSO_SYMBOL_INIT, \
}, \
}
/* Size of VMA associated with vdso */
static inline unsigned long vdso_vma_size(struct vdso_symtable *t)
{
return t->vma_end - t->vma_start;
}
static inline unsigned long vvar_vma_size(struct vdso_symtable *t)
{
return t->vvar_end - t->vvar_start;
}
/*
* Special mark which allows to identify runtime vdso where
* calls from proxy vdso are redirected. This mark usually
* placed at the start of vdso area where Elf header lives.
* Since such runtime vdso is solevey used by proxy and
* nobody else is supposed to access it, it's more-less
* safe to screw the Elf header with @signature and
* @proxy_addr.
*
* The @proxy_addr deserves a few comments. When we redirect
* the calls from proxy to runtime vdso, on next checkpoint
* it won't be possible to find which VMA is proxy, thus
* we save its address in the member.
*/
struct vdso_mark {
u64 signature;
unsigned long proxy_vdso_addr;
unsigned long version;
/*
* In case of new vDSO format the VVAR area address
* neeed for easier discovering where it lives without
* relying on procfs output.
*/
unsigned long proxy_vvar_addr;
};
#define VDSO_MARK_SIGNATURE (0x6f73647675697263ULL) /* Magic number (criuvdso) */
#define VDSO_MARK_SIGNATURE_V2 (0x4f53447675697263ULL) /* Magic number (criuvDSO) */
#define VDSO_MARK_CUR_VERSION (2)
static inline void vdso_put_mark(void *where, unsigned long proxy_vdso_addr, unsigned long proxy_vvar_addr)
{
struct vdso_mark *m = where;
m->signature = VDSO_MARK_SIGNATURE_V2;
m->proxy_vdso_addr = proxy_vdso_addr;
m->version = VDSO_MARK_CUR_VERSION;
m->proxy_vvar_addr = proxy_vvar_addr;
}
static inline bool is_vdso_mark(void *addr)
{
struct vdso_mark *m = addr;
if (m->signature == VDSO_MARK_SIGNATURE_V2) {
/*
* New format
*/
return true;
} else if (m->signature == VDSO_MARK_SIGNATURE) {
/*
* Old format -- simply extend the mark up
* to the version we support.
*/
vdso_put_mark(m, m->proxy_vdso_addr, VVAR_BAD_ADDR);
return true;
}
return false;
}
#define VDSO_SYMBOL_CLOCK_GETTIME_NAME "__vdso_clock_gettime"
#define VDSO_SYMBOL_GETCPU_NAME "__vdso_getcpu"
#define VDSO_SYMBOL_GETTIMEOFDAY_NAME "__vdso_gettimeofday"
#define VDSO_SYMBOL_TIME_NAME "__vdso_time"
extern struct vdso_symtable vdso_sym_rt;
extern u64 vdso_pfn;
extern int vdso_init(void);
extern int vdso_do_park(struct vdso_symtable *sym_rt, unsigned long park_at, unsigned long park_size);
extern int vdso_fill_symtable(char *mem, size_t size, struct vdso_symtable *t);
extern int vdso_proxify(char *who, struct vdso_symtable *sym_rt,
unsigned long vdso_rt_parked_at, size_t index,
VmaEntry *vmas, size_t nr_vmas);
extern int vdso_redirect_calls(void *base_to, void *base_from, struct vdso_symtable *to, struct vdso_symtable *from);
extern int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid,
struct vm_area_list *vma_area_list);
#endif /* __CR_ASM_VDSO_H__ */
This diff is collapsed.
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <elf.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include "asm/types.h"
#include "asm/parasite-syscall.h"
#include "parasite-syscall.h"
#include "parasite.h"
#include "compiler.h"
#include "kerndat.h"
#include "vdso.h"
#include "util.h"
#include "log.h"
#include "mem.h"
#include "vma.h"
#ifdef LOG_PREFIX
# undef LOG_PREFIX
#endif
#define LOG_PREFIX "vdso: "
struct vdso_symtable vdso_sym_rt = VDSO_SYMTABLE_INIT;
u64 vdso_pfn = VDSO_BAD_PFN;
/*
* The VMAs list might have proxy vdso/vvar areas left
* from previous dump/restore cycle so we need to detect
* them and eliminated from the VMAs list, they will be
* generated again on restore if needed.
*/
int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid,
struct vm_area_list *vma_area_list)
{
unsigned long proxy_vdso_addr = VDSO_BAD_ADDR;
unsigned long proxy_vvar_addr = VVAR_BAD_ADDR;
struct vma_area *proxy_vdso_marked = NULL;
struct vma_area *proxy_vvar_marked = NULL;
struct parasite_vdso_vma_entry *args;
struct vma_area *vma;
int fd, ret = -1;
off_t off;
u64 pfn;
args = parasite_args(ctl, struct parasite_vdso_vma_entry);
fd = open_proc(pid, "pagemap");
if (fd < 0)
return -1;
list_for_each_entry(vma, &vma_area_list->h, list) {
if (!vma_area_is(vma, VMA_AREA_REGULAR))
continue;
if (vma_area_is(vma, VMA_FILE_SHARED) ||
vma_area_is(vma, VMA_FILE_PRIVATE))
continue;
/*
* It might be possible VVAR area from marked
* vDSO zone, we need to detect it earlier than
* VDSO_PROT test because VVAR_PROT is a subset
* of it but don't yield continue here,
* sigh... what a mess.
*/
BUILD_BUG_ON(!(VDSO_PROT & VVAR_PROT));
if ((vma->e->prot & VVAR_PROT) == VVAR_PROT) {
if (proxy_vvar_addr != VVAR_BAD_ADDR &&
proxy_vvar_addr == vma->e->start) {
BUG_ON(proxy_vvar_marked);
proxy_vvar_marked = vma;
continue;
}
}
if ((vma->e->prot & VDSO_PROT) != VDSO_PROT)
continue;
if (vma->e->start > TASK_SIZE)
continue;
if (vma->e->flags & MAP_GROWSDOWN)
continue;
/*
* I need to poke every potentially marked vma,
* otherwise if task never called for vdso functions
* page frame number won't be reported.
*/
args->start = vma->e->start;
args->len = vma_area_len(vma);
if (parasite_execute_daemon(PARASITE_CMD_CHECK_VDSO_MARK, ctl)) {
pr_err("vdso: Parasite failed to poke for mark\n");
ret = -1;
goto err;
}
/*
* Defer handling marked vdso until we walked over
* all vmas and restore potentially remapped vDSO
* area status.
*/
if (unlikely(args->is_marked)) {
if (proxy_vdso_marked) {
pr_err("Ow! Second vdso mark detected!\n");
ret = -1;
goto err;
}
proxy_vdso_marked = vma;
proxy_vdso_addr = args->proxy_vdso_addr;
proxy_vvar_addr = args->proxy_vvar_addr;
continue;
}
off = (vma->e->start / PAGE_SIZE) * sizeof(u64);
ret = pread(fd, &pfn, sizeof(pfn), off);
if (ret < 0 || ret != sizeof(pfn)) {
pr_perror("Can't read pme for pid %d", pid);
ret = -1;
goto err;
}
pfn = PME_PFRAME(pfn);
if (!pfn) {
pr_err("Unexpected page fram number 0 for pid %d\n", pid);
ret = -1;
goto err;
}
/*
* Setup proper VMA status. Note starting with 3.16
* the [vdso]/[vvar] marks are reported correctly
* even when they are remapped into a new place,
* but only since that particular version of the
* kernel!
*/
if (pfn == vdso_pfn) {
if (!vma_area_is(vma, VMA_AREA_VDSO)) {
pr_debug("vdso: Restore vDSO status by pfn at %lx\n",
(long)vma->e->start);
vma->e->status |= VMA_AREA_VDSO;
}
} else {
if (unlikely(vma_area_is(vma, VMA_AREA_VDSO))) {
pr_debug("vdso: Drop mishinted vDSO status at %lx\n",
(long)vma->e->start);
vma->e->status &= ~VMA_AREA_VDSO;
}
}
}
/*
* There is marked vdso, it means such vdso is autogenerated
* and must be dropped from vma list.
*/
if (proxy_vdso_marked) {
pr_debug("vdso: Found marked at %lx (proxy vDSO at %lx VVAR at %lx)\n",
(long)proxy_vdso_marked->e->start,
(long)proxy_vdso_addr, (long)proxy_vvar_addr);
/*
* Don't forget to restore the proxy vdso/vvar status, since
* it's unknown to the kernel.
*/
list_for_each_entry(vma, &vma_area_list->h, list) {
if (vma->e->start == proxy_vdso_addr) {
vma->e->status |= VMA_AREA_REGULAR | VMA_AREA_VDSO;
pr_debug("vdso: Restore proxy vDSO status at %lx\n",
(long)vma->e->start);
} else if (vma->e->start == proxy_vvar_addr) {
vma->e->status |= VMA_AREA_REGULAR | VMA_AREA_VVAR;
pr_debug("vdso: Restore proxy VVAR status at %lx\n",
(long)vma->e->start);
}
}
pr_debug("vdso: Droppping marked vdso at %lx\n",
(long)proxy_vdso_marked->e->start);
list_del(&proxy_vdso_marked->list);
xfree(proxy_vdso_marked);
vma_area_list->nr--;
if (proxy_vvar_marked) {
pr_debug("vdso: Droppping marked vvar at %lx\n",
(long)proxy_vvar_marked->e->start);
list_del(&proxy_vvar_marked->list);
xfree(proxy_vvar_marked);
vma_area_list->nr--;
}
}
ret = 0;
err:
close(fd);
return ret;
}
static int vdso_fill_self_symtable(struct vdso_symtable *s)
{
char buf[512];
int ret = -1;
FILE *maps;
*s = (struct vdso_symtable)VDSO_SYMTABLE_INIT;
maps = fopen("/proc/self/maps", "r");
if (!maps) {
pr_perror("Can't open self-vma");
return -1;
}
while (fgets(buf, sizeof(buf), maps)) {
unsigned long start, end;
char *has_vdso, *has_vvar;
has_vdso = strstr(buf, "[vdso]");
if (!has_vdso)
has_vvar = strstr(buf, "[vvar]");
else
has_vvar = NULL;
if (!has_vdso && !has_vvar)
continue;
ret = sscanf(buf, "%lx-%lx", &start, &end);
if (ret != 2) {
ret = -1;
pr_err("Can't find vDSO/VVAR bounds\n");
goto err;
}
if (has_vdso) {
if (s->vma_start != VDSO_BAD_ADDR) {
pr_err("Got second vDSO entry\n");
ret = -1;
goto err;
}
s->vma_start = start;
s->vma_end = end;
ret = vdso_fill_symtable((void *)start, end - start, s);
if (ret)
goto err;
} else {
if (s->vvar_start != VVAR_BAD_ADDR) {
pr_err("Got second VVAR entry\n");
ret = -1;
goto err;
}
s->vvar_start = start;
s->vvar_end = end;
}
}
/*
* Validate its structure -- for new vDSO format the
* structure must be like
*
* 7fff1f5fd000-7fff1f5fe000 r-xp 00000000 00:00 0 [vdso]
* 7fff1f5fe000-7fff1f600000 r--p 00000000 00:00 0 [vvar]
*
* The areas may be in reverse order.
*
* 7fffc3502000-7fffc3504000 r--p 00000000 00:00 0 [vvar]
* 7fffc3504000-7fffc3506000 r-xp 00000000 00:00 0 [vdso]
*
*/
ret = 0;
if (s->vma_start != VDSO_BAD_ADDR) {
if (s->vvar_start != VVAR_BAD_ADDR) {
if (s->vma_end != s->vvar_start &&
s->vvar_end != s->vma_start) {
ret = -1;
pr_err("Unexpected rt vDSO area bounds\n");
goto err;
}
}
} else {
ret = -1;
pr_err("Can't find rt vDSO\n");
goto err;
}
pr_debug("rt [vdso] %lx-%lx [vvar] %lx-%lx\n",
s->vma_start, s->vma_end,
s->vvar_start, s->vvar_end);
err:
fclose(maps);
return ret;
}
int vdso_init(void)
{
if (vdso_fill_self_symtable(&vdso_sym_rt))
return -1;
return vaddr_to_pfn(vdso_sym_rt.vma_start, &vdso_pfn);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment