Commit 901c8754 authored by Andrey Vagin's avatar Andrey Vagin Committed by Pavel Emelyanov

shmem: use memfd_create() to restore shared memory reqions (v2)

/proc/PID/map_files are protected by the global CAP_SYS_ADMIN, so we
need to avoid using them to support user namespaces.

We are going to use memfd_create() to get the first file descriptor and
then all others processes will able to open it via /proc/PID/fd/X.

In this patch memfd_create() is used to get a file descriptor, which is
associated with a shared memory region.

If memfd_create() isn't supported, the old scheme will be used.

v2: don't set a name for memfd. It isn't required.
Signed-off-by: 's avatarAndrey Vagin <avagin@openvz.org>
Signed-off-by: 's avatarPavel Emelyanov <xemul@parallels.com>
parent 2c65748f
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#include "shmem.h" #include "shmem.h"
#include "image.h" #include "image.h"
#include "cr_options.h" #include "cr_options.h"
#include "kerndat.h"
#include "page-pipe.h" #include "page-pipe.h"
#include "page-xfer.h" #include "page-xfer.h"
#include "rst-malloc.h" #include "rst-malloc.h"
...@@ -175,8 +176,9 @@ err_unmap: ...@@ -175,8 +176,9 @@ err_unmap:
int get_shmem_fd(int pid, VmaEntry *vi) int get_shmem_fd(int pid, VmaEntry *vi)
{ {
struct shmem_info *si; struct shmem_info *si;
void *addr; void *addr = MAP_FAILED;
int f; int f = -1;
int flags;
si = find_shmem_by_id(vi->shmid); si = find_shmem_by_id(vi->shmid);
pr_info("Search for 0x%016"PRIx64" shmem 0x%"PRIx64" %p/%d\n", vi->start, vi->shmid, si, si ? si->pid : -1); pr_info("Search for 0x%016"PRIx64" shmem 0x%"PRIx64" %p/%d\n", vi->start, vi->shmid, si, si ? si->pid : -1);
...@@ -191,6 +193,22 @@ int get_shmem_fd(int pid, VmaEntry *vi) ...@@ -191,6 +193,22 @@ int get_shmem_fd(int pid, VmaEntry *vi)
if (si->fd != -1) if (si->fd != -1)
return dup(si->fd); return dup(si->fd);
flags = MAP_SHARED;
if (memfd_is_supported) {
f = sys_memfd_create("", 0);
if (f < 0) {
pr_perror("Unable to create memfd");
goto err;
}
if (ftruncate(f, si->size)) {
pr_perror("Unable to truncate memfd");
goto err;
}
flags |= MAP_FILE;
} else
flags |= MAP_ANONYMOUS;
/* /*
* The following hack solves problems: * The following hack solves problems:
* vi->pgoff may be not zero in a target process. * vi->pgoff may be not zero in a target process.
...@@ -198,23 +216,25 @@ int get_shmem_fd(int pid, VmaEntry *vi) ...@@ -198,23 +216,25 @@ int get_shmem_fd(int pid, VmaEntry *vi)
* The restorer doesn't have snprintf. * The restorer doesn't have snprintf.
* Here is a good place to restore content * Here is a good place to restore content
*/ */
addr = mmap(NULL, si->size, addr = mmap(NULL, si->size, PROT_WRITE | PROT_READ, flags, f, 0);
PROT_WRITE | PROT_READ,
MAP_SHARED | MAP_ANONYMOUS, -1, 0);
if (addr == MAP_FAILED) { if (addr == MAP_FAILED) {
pr_err("Can't mmap shmid=0x%"PRIx64" size=%ld\n", pr_err("Can't mmap shmid=0x%"PRIx64" size=%ld\n",
vi->shmid, si->size); vi->shmid, si->size);
return -1; goto err;
} }
if (restore_shmem_content(addr, si) < 0) { if (restore_shmem_content(addr, si) < 0) {
pr_err("Can't restore shmem content\n"); pr_err("Can't restore shmem content\n");
return -1; goto err;
} }
if (f == -1) {
f = open_proc_rw(getpid(), "map_files/%lx-%lx", f = open_proc_rw(getpid(), "map_files/%lx-%lx",
(unsigned long) addr, (unsigned long) addr,
(unsigned long) addr + si->size); (unsigned long) addr + si->size);
if (f < 0)
goto err;
}
munmap(addr, si->size); munmap(addr, si->size);
si->fd = f; si->fd = f;
...@@ -228,6 +248,11 @@ int get_shmem_fd(int pid, VmaEntry *vi) ...@@ -228,6 +248,11 @@ int get_shmem_fd(int pid, VmaEntry *vi)
futex_wait_until(&si->lock, si->count - si->self_count + 1); futex_wait_until(&si->lock, si->count - si->self_count + 1);
return f; return f;
err:
if (addr != MAP_FAILED)
munmap(addr, si->size);
close_safe(&f);
return -1;
} }
struct shmem_info_dump { struct shmem_info_dump {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment