Commit fd58e62b authored by Andrey Vagin's avatar Andrey Vagin Committed by Pavel Emelyanov

mm: map grow-down VMA-s with guard pages

In /proc/pid/maps grow-down VMA-s are shown without guard pages, but
sometime these "guard" pages can contain usefull data. For example if
a real guard page has been remmaped by another VMA. Let's call such
pages as fake guard pages.

So when a grow-down VMA is mmaped on restore, it should be mapped with
one more guard page to restore content of the fake guard page.

https://bugzilla.openvz.org/show_bug.cgi?id=2715Signed-off-by: 's avatarAndrey Vagin <avagin@openvz.org>
Signed-off-by: 's avatarPavel Emelyanov <xemul@parallels.com>
parent 8f18db5f
...@@ -207,7 +207,7 @@ static int map_private_vma(pid_t pid, struct vma_area *vma, void *tgt_addr, ...@@ -207,7 +207,7 @@ static int map_private_vma(pid_t pid, struct vma_area *vma, void *tgt_addr,
{ {
int ret; int ret;
void *addr, *paddr = NULL; void *addr, *paddr = NULL;
unsigned long nr_pages; unsigned long nr_pages, size;
struct vma_area *p = *pvma; struct vma_area *p = *pvma;
if (vma_entry_is(&vma->vma, VMA_FILE_PRIVATE)) { if (vma_entry_is(&vma->vma, VMA_FILE_PRIVATE)) {
...@@ -242,6 +242,17 @@ static int map_private_vma(pid_t pid, struct vma_area *vma, void *tgt_addr, ...@@ -242,6 +242,17 @@ static int map_private_vma(pid_t pid, struct vma_area *vma, void *tgt_addr,
*pvma = p; *pvma = p;
/*
* A grow-down VMA has a guard page, which protect a VMA below it.
* So one more page is mapped here to restore content of the first page
*/
if (vma->vma.flags & MAP_GROWSDOWN) {
vma->vma.start -= PAGE_SIZE;
if (paddr)
paddr -= PAGE_SIZE;
}
size = vma_entry_len(&vma->vma);
if (paddr == NULL) { if (paddr == NULL) {
/* /*
* The respective memory area was NOT found in the parent. * The respective memory area was NOT found in the parent.
...@@ -250,7 +261,7 @@ static int map_private_vma(pid_t pid, struct vma_area *vma, void *tgt_addr, ...@@ -250,7 +261,7 @@ static int map_private_vma(pid_t pid, struct vma_area *vma, void *tgt_addr,
pr_info("Map 0x%016"PRIx64"-0x%016"PRIx64" 0x%016"PRIx64" vma\n", pr_info("Map 0x%016"PRIx64"-0x%016"PRIx64" 0x%016"PRIx64" vma\n",
vma->vma.start, vma->vma.end, vma->vma.pgoff); vma->vma.start, vma->vma.end, vma->vma.pgoff);
addr = mmap(tgt_addr, vma_entry_len(&vma->vma), addr = mmap(tgt_addr, size,
vma->vma.prot | PROT_WRITE, vma->vma.prot | PROT_WRITE,
vma->vma.flags | MAP_FIXED, vma->vma.flags | MAP_FIXED,
vma->vma.fd, vma->vma.pgoff); vma->vma.fd, vma->vma.pgoff);
...@@ -266,7 +277,7 @@ static int map_private_vma(pid_t pid, struct vma_area *vma, void *tgt_addr, ...@@ -266,7 +277,7 @@ static int map_private_vma(pid_t pid, struct vma_area *vma, void *tgt_addr,
*/ */
vma->ppage_bitmap = p->page_bitmap; vma->ppage_bitmap = p->page_bitmap;
addr = mremap(paddr, vma_area_len(vma), vma_area_len(vma), addr = mremap(paddr, size, size,
MREMAP_FIXED | MREMAP_MAYMOVE, tgt_addr); MREMAP_FIXED | MREMAP_MAYMOVE, tgt_addr);
if (addr != tgt_addr) { if (addr != tgt_addr) {
pr_perror("Unable to remap a private vma"); pr_perror("Unable to remap a private vma");
...@@ -279,10 +290,15 @@ static int map_private_vma(pid_t pid, struct vma_area *vma, void *tgt_addr, ...@@ -279,10 +290,15 @@ static int map_private_vma(pid_t pid, struct vma_area *vma, void *tgt_addr,
pr_debug("\tpremap 0x%016"PRIx64"-0x%016"PRIx64" -> %016lx\n", pr_debug("\tpremap 0x%016"PRIx64"-0x%016"PRIx64" -> %016lx\n",
vma->vma.start, vma->vma.end, (unsigned long)addr); vma->vma.start, vma->vma.end, (unsigned long)addr);
if (vma->vma.flags & MAP_GROWSDOWN) { /* Skip gurad page */
vma->vma.start += PAGE_SIZE;
vma_premmaped_start(&vma->vma) += PAGE_SIZE;
}
if (vma_entry_is(&vma->vma, VMA_FILE_PRIVATE)) if (vma_entry_is(&vma->vma, VMA_FILE_PRIVATE))
close(vma->vma.fd); close(vma->vma.fd);
return 0; return size;
} }
static int restore_priv_vma_content(pid_t pid) static int restore_priv_vma_content(pid_t pid)
...@@ -475,8 +491,11 @@ static int prepare_mappings(int pid) ...@@ -475,8 +491,11 @@ static int prepare_mappings(int pid)
break; break;
} }
if (vma_priv(&vma->vma)) if (vma_priv(&vma->vma)) {
rst_vmas.priv_size += vma_area_len(vma); rst_vmas.priv_size += vma_area_len(vma);
if (vma->vma.flags & MAP_GROWSDOWN)
rst_vmas.priv_size += PAGE_SIZE;
}
} }
close(fd); close(fd);
...@@ -512,10 +531,10 @@ static int prepare_mappings(int pid) ...@@ -512,10 +531,10 @@ static int prepare_mappings(int pid)
if (ret < 0) if (ret < 0)
break; break;
addr += vma_area_len(vma); addr += ret;
} }
if (ret == 0) if (ret >= 0)
ret = restore_priv_vma_content(pid); ret = restore_priv_vma_content(pid);
out: out:
...@@ -536,6 +555,31 @@ out: ...@@ -536,6 +555,31 @@ out:
return ret; return ret;
} }
/*
* A gard page must be unmapped after restoring content and
* forking children to restore COW memory.
*/
static int unmap_guard_pages()
{
struct vma_area *vma;
list_for_each_entry(vma, &rst_vmas.h, list) {
if (!vma_priv(&vma->vma))
continue;
if (vma->vma.flags & MAP_GROWSDOWN) {
void *addr = (void *) vma_premmaped_start(&vma->vma);
if (munmap(addr - PAGE_SIZE, PAGE_SIZE)) {
pr_perror("Can't unmap guard page\n");
return -1;
}
}
}
return 0;
}
static int open_vmas(int pid) static int open_vmas(int pid)
{ {
struct vma_area *vma; struct vma_area *vma;
...@@ -1184,6 +1228,8 @@ static int restore_task_with_children(void *_arg) ...@@ -1184,6 +1228,8 @@ static int restore_task_with_children(void *_arg)
if (create_children_and_session()) if (create_children_and_session())
exit(1); exit(1);
if (unmap_guard_pages())
exit(1);
/* /*
* Unlike sessions, process groups (a.k.a. pgids) can be joined * Unlike sessions, process groups (a.k.a. pgids) can be joined
* by any task, provided the task with pid == pgid (group leader) * by any task, provided the task with pid == pgid (group leader)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment