Commit 2c65748f authored by Andrey Vagin's avatar Andrey Vagin Committed by Pavel Emelyanov

shmem: rework getting file descriptors for shared memory regions (v2)

/proc/PID/map_files are protected by the global CAP_SYS_ADMIN, so we
need to avoid using them to support user namespaces.

We are going to use memfd_create() to get the first file descriptor and
then all others processes will able to open it via /proc/PID/fd/X.

This patch reworks slave processes to not use map_files.

v2: add more comments
Signed-off-by: 's avatarAndrey Vagin <avagin@openvz.org>
Signed-off-by: 's avatarPavel Emelyanov <xemul@parallels.com>
parent de71c480
...@@ -18,7 +18,23 @@ struct shmem_info { ...@@ -18,7 +18,23 @@ struct shmem_info {
unsigned long size; unsigned long size;
int pid; int pid;
int fd; int fd;
/*
* 0. lock is initilized to zero
* 1. the master opens a descriptor and set lock to 1
* 2. slaves open their descriptors and increment lock
* 3. the master waits all slaves on lock. After that
* it can close the descriptor.
*/
futex_t lock; futex_t lock;
/*
* Here is a problem, that we don't know, which process will restore
* an region. Each time when we found a process with a smaller pid,
* we reset self_count, so we can't have only one counter.
*/
int count; /* the number of regions */
int self_count; /* the number of regions, which belongs to "pid" */
}; };
struct _VmaEntry; struct _VmaEntry;
......
...@@ -881,16 +881,6 @@ long __export_restore_task(struct task_restore_args *args) ...@@ -881,16 +881,6 @@ long __export_restore_task(struct task_restore_args *args)
if (!(vma_entry_is(vma_entry, VMA_AREA_REGULAR))) if (!(vma_entry_is(vma_entry, VMA_AREA_REGULAR)))
continue; continue;
if (vma_entry_is(vma_entry, VMA_ANON_SHARED)) {
struct shmem_info *entry;
entry = find_shmem(args->shmems, args->nr_shmems,
vma_entry->shmid);
if (entry && entry->pid == my_pid &&
entry->start == vma_entry->start)
futex_set_and_wake(&entry->lock, 1);
}
if (vma_entry->prot & PROT_WRITE) if (vma_entry->prot & PROT_WRITE)
continue; continue;
......
...@@ -55,6 +55,7 @@ int collect_shmem(int pid, VmaEntry *vi) ...@@ -55,6 +55,7 @@ int collect_shmem(int pid, VmaEntry *vi)
if (si->size < size) if (si->size < size)
si->size = size; si->size = size;
si->count++;
/* /*
* Only the shared mapping with a lowest * Only the shared mapping with a lowest
...@@ -62,12 +63,17 @@ int collect_shmem(int pid, VmaEntry *vi) ...@@ -62,12 +63,17 @@ int collect_shmem(int pid, VmaEntry *vi)
* will wait until the kernel propagate this mapping * will wait until the kernel propagate this mapping
* into /proc * into /proc
*/ */
if (!pid_rst_prio(pid, si->pid)) if (!pid_rst_prio(pid, si->pid)) {
if (si->pid == pid)
si->self_count++;
return 0; return 0;
}
si->pid = pid; si->pid = pid;
si->start = vi->start; si->start = vi->start;
si->end = vi->end; si->end = vi->end;
si->self_count = 1;
return 0; return 0;
} }
...@@ -85,6 +91,8 @@ int collect_shmem(int pid, VmaEntry *vi) ...@@ -85,6 +91,8 @@ int collect_shmem(int pid, VmaEntry *vi)
si->pid = pid; si->pid = pid;
si->size = size; si->size = size;
si->fd = -1; si->fd = -1;
si->count = 1;
si->self_count = 1;
nr_shmems++; nr_shmems++;
futex_init(&si->lock); futex_init(&si->lock);
...@@ -97,17 +105,18 @@ static int shmem_wait_and_open(int pid, struct shmem_info *si) ...@@ -97,17 +105,18 @@ static int shmem_wait_and_open(int pid, struct shmem_info *si)
char path[128]; char path[128];
int ret; int ret;
snprintf(path, sizeof(path), "/proc/%d/map_files/%lx-%lx", pr_info("Waiting for the %lx shmem to appear\n", si->shmid);
si->pid, si->start, si->end); futex_wait_while(&si->lock, 0);
pr_info("Waiting for [%s] to appear\n", path); snprintf(path, sizeof(path), "/proc/%d/fd/%d",
futex_wait_until(&si->lock, 1); si->pid, si->fd);
pr_info("Opening shmem [%s] \n", path); pr_info("Opening shmem [%s] \n", path);
ret = open_proc_rw(si->pid, "map_files/%lx-%lx", si->start, si->end); ret = open_proc_rw(si->pid, "fd/%d", si->fd);
if (ret < 0) if (ret < 0)
pr_perror(" %d: Can't stat shmem at %s", pr_perror(" %d: Can't stat shmem at %s",
si->pid, path); si->pid, path);
futex_inc_and_wake(&si->lock);
return ret; return ret;
} }
...@@ -207,10 +216,17 @@ int get_shmem_fd(int pid, VmaEntry *vi) ...@@ -207,10 +216,17 @@ int get_shmem_fd(int pid, VmaEntry *vi)
(unsigned long) addr, (unsigned long) addr,
(unsigned long) addr + si->size); (unsigned long) addr + si->size);
munmap(addr, si->size); munmap(addr, si->size);
if (f < 0)
return -1;
si->fd = f; si->fd = f;
/* Send signal to slaves, that they can open fd for this shmem */
futex_inc_and_wake(&si->lock);
/*
* All other regions in this process will duplicate
* the file descriptor, so we don't wait them.
*/
futex_wait_until(&si->lock, si->count - si->self_count + 1);
return f; return f;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment