Commit d2d3b6ff authored by Cyrill Gorcunov's avatar Cyrill Gorcunov Committed by Andrei Vagin

sfd: Move service fds to separate file

It has nothing to do with utils but
rather a separate service engine.
Signed-off-by: 's avatarCyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: 's avatarAndrei Vagin <avagin@gmail.com>
parent 7905b9f6
...@@ -84,6 +84,7 @@ obj-y += autofs.o ...@@ -84,6 +84,7 @@ obj-y += autofs.o
obj-y += fdstore.o obj-y += fdstore.o
obj-y += uffd.o obj-y += uffd.o
obj-y += config.o obj-y += config.o
obj-y += servicefd.o
ifeq ($(VDSO),y) ifeq ($(VDSO),y)
obj-y += pie-util-vdso.o obj-y += pie-util-vdso.o
......
...@@ -8,17 +8,17 @@ enum sfd_type { ...@@ -8,17 +8,17 @@ enum sfd_type {
LOG_FD_OFF, LOG_FD_OFF,
IMG_FD_OFF, IMG_FD_OFF,
PROC_FD_OFF, /* fd with /proc for all proc_ calls */ PROC_FD_OFF, /* fd with /proc for all proc_ calls */
PROC_PID_FD_OFF, PROC_PID_FD_OFF,
CR_PROC_FD_OFF, /* some other's proc fd. CR_PROC_FD_OFF, /* some other's proc fd:
* For dump -- target ns' proc * - For dump -- target ns' proc
* For restore -- CRIU ns' proc * - For restore -- CRIU ns' proc
*/ */
ROOT_FD_OFF, /* Root of the namespace we dump/restore */ ROOT_FD_OFF, /* Root of the namespace we dump/restore */
CGROUP_YARD, CGROUP_YARD,
USERNSD_SK, /* Socket for usernsd */ USERNSD_SK, /* Socket for usernsd */
NS_FD_OFF, /* Node's net namespace fd */ NS_FD_OFF, /* Node's net namespace fd */
TRANSPORT_FD_OFF, /* to transfer file descriptors */ TRANSPORT_FD_OFF, /* to transfer file descriptors */
RPC_SK_OFF, RPC_SK_OFF,
FDSTORE_SK_OFF, FDSTORE_SK_OFF,
...@@ -28,14 +28,13 @@ enum sfd_type { ...@@ -28,14 +28,13 @@ enum sfd_type {
struct pstree_item; struct pstree_item;
extern bool sfds_protected; extern bool sfds_protected;
extern void set_proc_self_fd(int fd);
extern int clone_service_fd(struct pstree_item *me);
extern int init_service_fd(void); extern int init_service_fd(void);
extern int get_service_fd(enum sfd_type type); extern int get_service_fd(enum sfd_type type);
extern bool is_any_service_fd(int fd);
extern bool is_service_fd(int fd, enum sfd_type type);
extern int service_fd_min_fd(struct pstree_item *item);
extern int install_service_fd(enum sfd_type type, int fd); extern int install_service_fd(enum sfd_type type, int fd);
extern int close_service_fd(enum sfd_type type); extern int close_service_fd(enum sfd_type type);
extern bool is_service_fd(int fd, enum sfd_type type); extern int clone_service_fd(struct pstree_item *me);
extern bool is_any_service_fd(int fd);
extern int service_fd_min_fd(struct pstree_item *);
#endif /* __CR_SERVICE_FD_H__ */ #endif /* __CR_SERVICE_FD_H__ */
...@@ -180,6 +180,8 @@ extern int cr_daemon(int nochdir, int noclose, int *keep_fd, int close_fd); ...@@ -180,6 +180,8 @@ extern int cr_daemon(int nochdir, int noclose, int *keep_fd, int close_fd);
extern int close_status_fd(void); extern int close_status_fd(void);
extern int is_root_user(void); extern int is_root_user(void);
extern void set_proc_self_fd(int fd);
static inline bool dir_dots(const struct dirent *de) static inline bool dir_dots(const struct dirent *de)
{ {
return !strcmp(de->d_name, ".") || !strcmp(de->d_name, ".."); return !strcmp(de->d_name, ".") || !strcmp(de->d_name, "..");
......
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#include "common/compiler.h" #include "common/compiler.h"
#include "xmalloc.h" #include "xmalloc.h"
#include "plugin.h" #include "plugin.h"
#include "servicefd.h"
#include "common/list.h" #include "common/list.h"
#include "log.h" #include "log.h"
...@@ -90,6 +91,11 @@ static int verify_plugin(cr_plugin_desc_t *d) ...@@ -90,6 +91,11 @@ static int verify_plugin(cr_plugin_desc_t *d)
return 0; return 0;
} }
int criu_get_image_dir(void)
{
return get_service_fd(IMG_FD_OFF);
}
static int cr_lib_load(int stage, char *path) static int cr_lib_load(int stage, char *path)
{ {
cr_plugin_desc_t *d; cr_plugin_desc_t *d;
......
#include <stdio.h>
#include <errno.h>
#include <unistd.h>
#include <sched.h>
#include <sys/time.h>
#include <sys/syscall.h>
#include <sys/resource.h>
#include "common/compiler.h"
#include "common/list.h"
#include "criu-log.h"
#include "util.h"
#include "bitops.h"
#include "pstree.h"
#include "files.h"
#include "rst_info.h"
#include "servicefd.h"
#undef LOG_PREFIX
#define LOG_PREFIX "sfd: "
/* Max potentially possible fd to be open by criu process */
int service_fd_rlim_cur;
/* Base of current process service fds set */
static int service_fd_base;
/* Id of current process in shared fdt */
static int service_fd_id = 0;
static DECLARE_BITMAP(sfd_map, SERVICE_FD_MAX);
static int sfd_arr[SERVICE_FD_MAX];
/*
* Variable for marking areas of code, where service fds modifications
* are prohibited. It's used to safe them from reusing their numbers
* by ordinary files. See install_service_fd() and close_service_fd().
*/
bool sfds_protected = false;
int init_service_fd(void)
{
struct rlimit64 rlimit;
/*
* Service FDs are those that most likely won't
* conflict with any 'real-life' ones
*/
if (syscall(__NR_prlimit64, getpid(), RLIMIT_NOFILE, NULL, &rlimit)) {
pr_perror("Can't get rlimit");
return -1;
}
service_fd_rlim_cur = (int)rlimit.rlim_cur;
return 0;
}
static int __get_service_fd(enum sfd_type type, int service_fd_id)
{
return service_fd_base - type - SERVICE_FD_MAX * service_fd_id;
}
int get_service_fd(enum sfd_type type)
{
BUG_ON((int)type <= SERVICE_FD_MIN || (int)type >= SERVICE_FD_MAX);
if (!test_bit(type, sfd_map))
return -1;
if (service_fd_base == 0)
return sfd_arr[type];
return __get_service_fd(type, service_fd_id);
}
bool is_any_service_fd(int fd)
{
int sfd_min_fd = __get_service_fd(SERVICE_FD_MAX, service_fd_id);
int sfd_max_fd = __get_service_fd(SERVICE_FD_MIN, service_fd_id);
if (fd > sfd_min_fd && fd < sfd_max_fd) {
int type = SERVICE_FD_MAX - (fd - sfd_min_fd);
if (type > SERVICE_FD_MIN && type < SERVICE_FD_MAX)
return !!test_bit(type, sfd_map);
}
return false;
}
bool is_service_fd(int fd, enum sfd_type type)
{
return fd == get_service_fd(type);
}
int service_fd_min_fd(struct pstree_item *item)
{
struct fdt *fdt = rsti(item)->fdt;
int id = 0;
if (fdt)
id = fdt->nr - 1;
return service_fd_rlim_cur - (SERVICE_FD_MAX - 1) - SERVICE_FD_MAX * id;
}
static void sfds_protection_bug(enum sfd_type type)
{
pr_err("Service fd %u is being modified in protected context\n", type);
print_stack_trace(current ? vpid(current) : 0);
BUG();
}
int install_service_fd(enum sfd_type type, int fd)
{
int sfd = __get_service_fd(type, service_fd_id);
BUG_ON((int)type <= SERVICE_FD_MIN || (int)type >= SERVICE_FD_MAX);
if (sfds_protected && !test_bit(type, sfd_map))
sfds_protection_bug(type);
if (service_fd_base == 0) {
if (test_bit(type, sfd_map))
close(sfd_arr[type]);
sfd_arr[type] = fd;
set_bit(type, sfd_map);
return fd;
}
if (dup3(fd, sfd, O_CLOEXEC) != sfd) {
pr_perror("Dup %d -> %d failed", fd, sfd);
close(fd);
return -1;
}
set_bit(type, sfd_map);
close(fd);
return sfd;
}
int close_service_fd(enum sfd_type type)
{
int fd;
if (sfds_protected)
sfds_protection_bug(type);
fd = get_service_fd(type);
if (fd < 0)
return 0;
if (close_safe(&fd))
return -1;
clear_bit(type, sfd_map);
return 0;
}
static void move_service_fd(struct pstree_item *me, int type, int new_id, int new_base)
{
int old = get_service_fd(type);
int new = new_base - type - SERVICE_FD_MAX * new_id;
int ret;
if (old < 0)
return;
ret = dup2(old, new);
if (ret == -1) {
if (errno != EBADF)
pr_perror("Unable to clone %d->%d", old, new);
} else if (!(rsti(me)->clone_flags & CLONE_FILES))
close(old);
}
static int choose_service_fd_base(struct pstree_item *me)
{
int nr, real_nr, fdt_nr = 1, id = rsti(me)->service_fd_id;
if (rsti(me)->fdt) {
/* The base is set by owner of fdt (id 0) */
if (id != 0)
return service_fd_base;
fdt_nr = rsti(me)->fdt->nr;
}
/* Now find process's max used fd number */
if (!list_empty(&rsti(me)->fds))
nr = list_entry(rsti(me)->fds.prev,
struct fdinfo_list_entry, ps_list)->fe->fd;
else
nr = -1;
nr = max(nr, inh_fd_max);
/*
* Service fds go after max fd near right border of alignment:
*
* ...|max_fd|max_fd+1|...|sfd first|...|sfd last (aligned)|
*
* So, they take maximum numbers of area allocated by kernel.
* See linux alloc_fdtable() for details.
*/
nr += (SERVICE_FD_MAX - SERVICE_FD_MIN) * fdt_nr;
nr += 16; /* Safety pad */
real_nr = nr;
nr /= (1024 / sizeof(void *));
if (nr)
nr = 1 << (32 - __builtin_clz(nr));
else
nr = 1;
nr *= (1024 / sizeof(void *));
if (nr > service_fd_rlim_cur) {
/* Right border is bigger, than rlim. OK, then just aligned value is enough */
nr = round_down(service_fd_rlim_cur, (1024 / sizeof(void *)));
if (nr < real_nr) {
pr_err("Can't chose service_fd_base: %d %d\n", nr, real_nr);
return -1;
}
}
return nr;
}
int clone_service_fd(struct pstree_item *me)
{
int id, new_base, i, ret = -1;
new_base = choose_service_fd_base(me);
id = rsti(me)->service_fd_id;
if (new_base == -1)
return -1;
if (service_fd_base == new_base && service_fd_id == id)
return 0;
/* Dup sfds in memmove() style: they may overlap */
if (get_service_fd(LOG_FD_OFF) < new_base - LOG_FD_OFF - SERVICE_FD_MAX * id)
for (i = SERVICE_FD_MIN + 1; i < SERVICE_FD_MAX; i++)
move_service_fd(me, i, id, new_base);
else
for (i = SERVICE_FD_MAX - 1; i > SERVICE_FD_MIN; i--)
move_service_fd(me, i, id, new_base);
service_fd_base = new_base;
service_fd_id = id;
ret = 0;
return ret;
}
...@@ -5,7 +5,6 @@ ...@@ -5,7 +5,6 @@
#include <string.h> #include <string.h>
#include <errno.h> #include <errno.h>
#include <stdbool.h> #include <stdbool.h>
#include <limits.h>
#include <signal.h> #include <signal.h>
#include <unistd.h> #include <unistd.h>
#include <dirent.h> #include <dirent.h>
...@@ -17,8 +16,6 @@ ...@@ -17,8 +16,6 @@
#include <sys/param.h> #include <sys/param.h>
#include <sys/types.h> #include <sys/types.h>
#include <sys/ptrace.h> #include <sys/ptrace.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <sys/mman.h> #include <sys/mman.h>
#include <sys/wait.h> #include <sys/wait.h>
...@@ -29,12 +26,8 @@ ...@@ -29,12 +26,8 @@
#include <sched.h> #include <sched.h>
#include <ctype.h> #include <ctype.h>
#include "bitops.h"
#include "page.h" #include "page.h"
#include "common/compiler.h"
#include "common/list.h"
#include "util.h" #include "util.h"
#include "rst-malloc.h"
#include "image.h" #include "image.h"
#include "vma.h" #include "vma.h"
#include "mem.h" #include "mem.h"
...@@ -43,7 +36,6 @@ ...@@ -43,7 +36,6 @@
#include "clone-noasan.h" #include "clone-noasan.h"
#include "cr_options.h" #include "cr_options.h"
#include "servicefd.h"
#include "cr-service.h" #include "cr-service.h"
#include "files.h" #include "files.h"
#include "pstree.h" #include "pstree.h"
...@@ -422,237 +414,6 @@ int do_open_proc(pid_t pid, int flags, const char *fmt, ...) ...@@ -422,237 +414,6 @@ int do_open_proc(pid_t pid, int flags, const char *fmt, ...)
return openat(dirfd, path, flags); return openat(dirfd, path, flags);
} }
/* Max potentially possible fd to be open by criu process */
int service_fd_rlim_cur;
/* Base of current process service fds set */
static int service_fd_base;
/* Id of current process in shared fdt */
static int service_fd_id = 0;
int init_service_fd(void)
{
struct rlimit64 rlimit;
/*
* Service FDs are those that most likely won't
* conflict with any 'real-life' ones
*/
if (syscall(__NR_prlimit64, getpid(), RLIMIT_NOFILE, NULL, &rlimit)) {
pr_perror("Can't get rlimit");
return -1;
}
service_fd_rlim_cur = (int)rlimit.rlim_cur;
return 0;
}
static int __get_service_fd(enum sfd_type type, int service_fd_id)
{
return service_fd_base - type - SERVICE_FD_MAX * service_fd_id;
}
int service_fd_min_fd(struct pstree_item *item)
{
struct fdt *fdt = rsti(item)->fdt;
int id = 0;
if (fdt)
id = fdt->nr - 1;
return service_fd_rlim_cur - (SERVICE_FD_MAX - 1) - SERVICE_FD_MAX * id;
}
static DECLARE_BITMAP(sfd_map, SERVICE_FD_MAX);
static int sfd_arr[SERVICE_FD_MAX];
/*
* Variable for marking areas of code, where service fds modifications
* are prohibited. It's used to safe them from reusing their numbers
* by ordinary files. See install_service_fd() and close_service_fd().
*/
bool sfds_protected = false;
static void sfds_protection_bug(enum sfd_type type)
{
pr_err("Service fd %u is being modified in protected context\n", type);
print_stack_trace(current ? vpid(current) : 0);
BUG();
}
int install_service_fd(enum sfd_type type, int fd)
{
int sfd = __get_service_fd(type, service_fd_id);
BUG_ON((int)type <= SERVICE_FD_MIN || (int)type >= SERVICE_FD_MAX);
if (sfds_protected && !test_bit(type, sfd_map))
sfds_protection_bug(type);
if (service_fd_base == 0) {
if (test_bit(type, sfd_map))
close(sfd_arr[type]);
sfd_arr[type] = fd;
set_bit(type, sfd_map);
return fd;
}
if (dup3(fd, sfd, O_CLOEXEC) != sfd) {
pr_perror("Dup %d -> %d failed", fd, sfd);
close(fd);
return -1;
}
set_bit(type, sfd_map);
close(fd);
return sfd;
}
int get_service_fd(enum sfd_type type)
{
BUG_ON((int)type <= SERVICE_FD_MIN || (int)type >= SERVICE_FD_MAX);
if (!test_bit(type, sfd_map))
return -1;
if (service_fd_base == 0)
return sfd_arr[type];
return __get_service_fd(type, service_fd_id);
}
int criu_get_image_dir(void)
{
return get_service_fd(IMG_FD_OFF);
}
int close_service_fd(enum sfd_type type)
{
int fd;
if (sfds_protected)
sfds_protection_bug(type);
fd = get_service_fd(type);
if (fd < 0)
return 0;
if (close_safe(&fd))
return -1;
clear_bit(type, sfd_map);
return 0;
}
static void move_service_fd(struct pstree_item *me, int type, int new_id, int new_base)
{
int old = get_service_fd(type);
int new = new_base - type - SERVICE_FD_MAX * new_id;
int ret;
if (old < 0)
return;
ret = dup2(old, new);
if (ret == -1) {
if (errno != EBADF)
pr_perror("Unable to clone %d->%d", old, new);
} else if (!(rsti(me)->clone_flags & CLONE_FILES))
close(old);
}
static int choose_service_fd_base(struct pstree_item *me)
{
int nr, real_nr, fdt_nr = 1, id = rsti(me)->service_fd_id;
if (rsti(me)->fdt) {
/* The base is set by owner of fdt (id 0) */
if (id != 0)
return service_fd_base;
fdt_nr = rsti(me)->fdt->nr;
}
/* Now find process's max used fd number */
if (!list_empty(&rsti(me)->fds))
nr = list_entry(rsti(me)->fds.prev,
struct fdinfo_list_entry, ps_list)->fe->fd;
else
nr = -1;
nr = max(nr, inh_fd_max);
/*
* Service fds go after max fd near right border of alignment:
*
* ...|max_fd|max_fd+1|...|sfd first|...|sfd last (aligned)|
*
* So, they take maximum numbers of area allocated by kernel.
* See linux alloc_fdtable() for details.
*/
nr += (SERVICE_FD_MAX - SERVICE_FD_MIN) * fdt_nr;
nr += 16; /* Safety pad */
real_nr = nr;
nr /= (1024 / sizeof(void *));
if (nr)
nr = 1 << (32 - __builtin_clz(nr));
else
nr = 1;
nr *= (1024 / sizeof(void *));
if (nr > service_fd_rlim_cur) {
/* Right border is bigger, than rlim. OK, then just aligned value is enough */
nr = round_down(service_fd_rlim_cur, (1024 / sizeof(void *)));
if (nr < real_nr) {
pr_err("Can't chose service_fd_base: %d %d\n", nr, real_nr);
return -1;
}
}
return nr;
}
int clone_service_fd(struct pstree_item *me)
{
int id, new_base, i, ret = -1;
new_base = choose_service_fd_base(me);
id = rsti(me)->service_fd_id;
if (new_base == -1)
return -1;
if (service_fd_base == new_base && service_fd_id == id)
return 0;
/* Dup sfds in memmove() style: they may overlap */
if (get_service_fd(LOG_FD_OFF) < new_base - LOG_FD_OFF - SERVICE_FD_MAX * id)
for (i = SERVICE_FD_MIN + 1; i < SERVICE_FD_MAX; i++)
move_service_fd(me, i, id, new_base);
else
for (i = SERVICE_FD_MAX - 1; i > SERVICE_FD_MIN; i--)
move_service_fd(me, i, id, new_base);
service_fd_base = new_base;
service_fd_id = id;
ret = 0;
return ret;
}
bool is_any_service_fd(int fd)
{
int sfd_min_fd = __get_service_fd(SERVICE_FD_MAX, service_fd_id);
int sfd_max_fd = __get_service_fd(SERVICE_FD_MIN, service_fd_id);
if (fd > sfd_min_fd && fd < sfd_max_fd) {
int type = SERVICE_FD_MAX - (fd - sfd_min_fd);
if (type > SERVICE_FD_MIN && type < SERVICE_FD_MAX)
return !!test_bit(type, sfd_map);
}
return false;
}
bool is_service_fd(int fd, enum sfd_type type)
{
return fd == get_service_fd(type);
}
int copy_file(int fd_in, int fd_out, size_t bytes) int copy_file(int fd_in, int fd_out, size_t bytes)
{ {
ssize_t written = 0; ssize_t written = 0;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment