Commit 71f3f7e6 authored by Andrey Vagin's avatar Andrey Vagin Committed by Pavel Emelyanov

restorer: remap private vmas to correct places (v3)

All private vmas are placed in a premmapped region and
they are sorted by start addresses, so they should be shifted apart.

Here is one more problem with overlapped temporary and target regions,
mremap could not remap such cases directly, so for such cases a vma is
remapped away and then remapped on a target place.

v2: fix accoding with Pavel's comments
v3: add a huge comment with pictures
Signed-off-by: 's avatarAndrey Vagin <avagin@openvz.org>
Signed-off-by: 's avatarPavel Emelyanov <xemul@parallels.com>
parent f3e322a1
...@@ -1588,6 +1588,7 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core) ...@@ -1588,6 +1588,7 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
mem += self_vmas_len; mem += self_vmas_len;
task_args->tgt_vmas = vma_list_remap(mem, vmas_len, &rst_vma_list); task_args->tgt_vmas = vma_list_remap(mem, vmas_len, &rst_vma_list);
task_args->nr_vmas = rst_nr_vmas;
task_args->premmapped_addr = (unsigned long) premmapped_addr; task_args->premmapped_addr = (unsigned long) premmapped_addr;
task_args->premmapped_len = premmapped_len; task_args->premmapped_len = premmapped_len;
if (!task_args->tgt_vmas) if (!task_args->tgt_vmas)
......
...@@ -99,6 +99,7 @@ struct task_restore_core_args { ...@@ -99,6 +99,7 @@ struct task_restore_core_args {
struct task_entries *task_entries; struct task_entries *task_entries;
VmaEntry *self_vmas; VmaEntry *self_vmas;
VmaEntry *tgt_vmas; VmaEntry *tgt_vmas;
unsigned int nr_vmas;
unsigned long premmapped_addr; unsigned long premmapped_addr;
unsigned long premmapped_len; unsigned long premmapped_len;
rt_sigaction_t sigchld_act; rt_sigaction_t sigchld_act;
......
...@@ -321,6 +321,82 @@ static void rst_tcp_socks_all(int *arr, int size) ...@@ -321,6 +321,82 @@ static void rst_tcp_socks_all(int *arr, int size)
sys_munmap(arr, size); sys_munmap(arr, size);
} }
static int vma_remap(unsigned long src, unsigned long dst, unsigned long len)
{
unsigned long guard = 0, tmp;
pr_info("Remap %lx->%lx len %lx\n", src, dst, len);
if (src - dst < len)
guard = dst;
else if (dst - src < len)
guard = dst + len - PAGE_SIZE;
if (src == dst)
return 0;
if (guard != 0) {
/*
* mremap() returns an error if a target and source vma-s are
* overlapped. In this case the source vma are remapped in
* a temporary place and then remapped to the target address.
* Here is one hack to find non-ovelapped temporary place.
*
* 1. initial placement. We need to move src -> tgt.
* | |+++++src+++++|
* |-----tgt-----| |
*
* 2. map a guard page at the non-ovelapped border of a target vma.
* | |+++++src+++++|
* |G|----tgt----| |
*
* 3. remap src to any other place.
* G prevents src from being remaped on tgt again
* | |-------------| -> |+++++src+++++|
* |G|---tgt-----| |
*
* 4. remap src to tgt, no overlapping any longer
* |+++++src+++++| <---- |-------------|
* |G|---tgt-----| |
*/
unsigned long addr;
/* Map guard page (step 2) */
tmp = sys_mmap((void *) guard, PAGE_SIZE, PROT_NONE,
MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
if (tmp != guard) {
pr_err("Unable to map a guard page %lx (%lx)\n", guard, tmp);
return -1;
}
/* Move src to non-overlapping place (step 3) */
addr = sys_mmap(NULL, len, PROT_NONE,
MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
if (addr == (unsigned long) MAP_FAILED) {
pr_err("Unable to reserve memory (%lx)\n", addr);
return -1;
}
tmp = sys_mremap(src, len, len,
MREMAP_MAYMOVE | MREMAP_FIXED, addr);
if (tmp != addr) {
pr_err("Unable to remap %lx -> %lx (%lx)\n", src, addr, tmp);
return -1;
}
src = addr;
}
tmp = sys_mremap(src, len, len, MREMAP_MAYMOVE | MREMAP_FIXED, dst);
if (tmp != dst) {
pr_err("Unable to remap %lx -> %lx\n", src, dst);
return -1;
}
return 0;
}
/* /*
* The main routine to restore task via sigreturn. * The main routine to restore task via sigreturn.
* This one is very special, we never return there * This one is very special, we never return there
...@@ -384,6 +460,39 @@ long __export_restore_task(struct task_restore_core_args *args) ...@@ -384,6 +460,39 @@ long __export_restore_task(struct task_restore_core_args *args)
sys_munmap(args->self_vmas, sys_munmap(args->self_vmas,
((void *)(vma_entry + 1) - ((void *)args->self_vmas))); ((void *)(vma_entry + 1) - ((void *)args->self_vmas)));
/* Shift private vma-s to the left */
for (vma_entry = args->tgt_vmas; vma_entry->start != 0; vma_entry++) {
if (!vma_entry_is(vma_entry, VMA_AREA_REGULAR))
continue;
if (!vma_priv(vma_entry))
continue;
if (vma_entry->start > vma_entry->shmid)
break;
if (vma_remap(vma_premmaped_start(vma_entry),
vma_entry->start, vma_entry_len(vma_entry)))
goto core_restore_end;
}
/* Shift private vma-s to the right */
for (vma_entry = args->tgt_vmas + args->nr_vmas -1;
vma_entry >= args->tgt_vmas; vma_entry--) {
if (!vma_entry_is(vma_entry, VMA_AREA_REGULAR))
continue;
if (!vma_priv(vma_entry))
continue;
if (vma_entry->start < vma_entry->shmid)
break;
if (vma_remap(vma_premmaped_start(vma_entry),
vma_entry->start, vma_entry_len(vma_entry)))
goto core_restore_end;
}
/* /*
* OK, lets try to map new one. * OK, lets try to map new one.
*/ */
...@@ -391,6 +500,9 @@ long __export_restore_task(struct task_restore_core_args *args) ...@@ -391,6 +500,9 @@ long __export_restore_task(struct task_restore_core_args *args)
if (!vma_entry_is(vma_entry, VMA_AREA_REGULAR)) if (!vma_entry_is(vma_entry, VMA_AREA_REGULAR))
continue; continue;
if (vma_priv(vma_entry))
continue;
va = restore_mapping(vma_entry); va = restore_mapping(vma_entry);
if (va != vma_entry->start) { if (va != vma_entry->start) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment