Commit b548ed94 authored by Andrei Vagin's avatar Andrei Vagin

criu: raise the task file limit for specific commands

We don't need to do this from early_init.

Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: 's avatarAndrei Vagin <avagin@gmail.com>
parent 32d6e7b5
......@@ -1557,6 +1557,11 @@ int cr_pre_dump_tasks(pid_t pid)
struct pstree_item *item;
int ret = -1;
/*
* We might need a lot of pipes to fetch huge number of pages to dump.
*/
rlimit_unlimit_nofile();
root_item = alloc_pstree_item();
if (!root_item)
goto err;
......@@ -1754,6 +1759,13 @@ int cr_dump_tasks(pid_t pid)
pr_info("Dumping processes (pid: %d)\n", pid);
pr_info("========================================\n");
/*
* We will fetch all file descriptors for each task, their number can
* be bigger than a default file limit, so we need to raise it to the
* maximum.
*/
rlimit_unlimit_nofile();
root_item = alloc_pstree_item();
if (!root_item)
goto err;
......
......@@ -2330,6 +2330,9 @@ int cr_restore_tasks(void)
{
int ret = -1;
if (init_service_fd())
return 1;
if (cr_plugin_init(CR_PLUGIN_STAGE__RESTORE))
return -1;
......
......@@ -20,9 +20,6 @@
#include <sys/utsname.h>
#include <sys/time.h>
#include <sys/resource.h>
#include "int.h"
#include "page.h"
#include "common/compiler.h"
......@@ -50,79 +47,6 @@
#include "setproctitle.h"
#include "sysctl.h"
static void rlimit_unlimit_nofile(void)
{
struct rlimit new;
new.rlim_cur = kdat.sysctl_nr_open;
new.rlim_max = kdat.sysctl_nr_open;
if (prlimit(getpid(), RLIMIT_NOFILE, &new, NULL)) {
pr_perror("rlimit: Can't setup RLIMIT_NOFILE for self");
return;
} else
pr_debug("rlimit: RLIMIT_NOFILE unlimited for self\n");
service_fd_rlim_cur = kdat.sysctl_nr_open;
}
static int early_init(const char *cmd)
{
static const char *nofile_cmds[] = {
"swrk", "service",
"dump", "pre-dump",
"restore",
};
size_t i;
/*
* Service fd engine implies that file descriptors
* used won't be borrowed by the rest of the code
* and default 1024 limit is not enough for high
* loaded test/containers. Thus use kdat engine
* to fetch current system level limit for numbers
* of files allowed to open up and lift up own
* limits.
*
* Note we have to do it before the service fd
* get inited and we dont exit with errors here
* because in worst scenario where clash of fd
* happen we simply exit with explicit error
* during real action stage.
*
* Same time raising limits cause kernel fdtable
* to bloat so we do this only on the @nofile_cmds:
*
* - on dump criu needs additional files for sfd,
* thus if container already has many files opened
* we need to have at least not less space when
* fetching fds from a target process;
*
* - on pre-dump we might need a lot of pipes to
* fetch huge number of pages to dump;
*
* - on restore we still need to raise limits since
* there is no guarantee that on dump we've not
* been hitting fd limit already;
*
* - swrk and service obtain requests on the fly,
* thus we don't know if on of above will be
* there thus raise limits.
*/
for (i = 0; i < ARRAY_SIZE(nofile_cmds); i++) {
if (strcmp(nofile_cmds[i], cmd))
continue;
if (!kerndat_files_stat(true))
rlimit_unlimit_nofile();
break;
}
if (init_service_fd())
return 1;
return 0;
}
int main(int argc, char *argv[], char *envp[])
{
int ret = -1;
......@@ -158,9 +82,6 @@ int main(int argc, char *argv[], char *envp[])
log_set_loglevel(opts.log_level);
if (early_init(argv[optind]))
return -1;
if (!strcmp(argv[1], "swrk")) {
if (argc < 3)
goto usage;
......
......@@ -350,6 +350,8 @@ extern int epoll_del_rfd(int epfd, struct epoll_rfd *rfd);
extern int epoll_run_rfds(int epfd, struct epoll_event *evs, int nr_fds, int tmo);
extern int epoll_prepare(int nr_events, struct epoll_event **evs);
extern void rlimit_unlimit_nofile(void);
extern int call_in_child_process(int (*fn)(void *), void *arg);
#ifdef __GLIBC__
extern void print_stack_trace(pid_t pid);
......
......@@ -67,6 +67,20 @@ int init_service_fd(void)
{
struct rlimit64 rlimit;
/*
* Service fd engine implies that file descriptors used won't be
* borrowed by the rest of the code and default 1024 limit is not
* enough for high loaded test/containers. Thus use kdat engine to
* fetch current system level limit for numbers of files allowed to
* open up and lift up own limits.
*
* Note we have to do it before the service fd get initialized and we
* don't exit with errors here because in worst scenario where clash of
* fd happen we simply exit with explicit error during real action
* stage.
*/
rlimit_unlimit_nofile();
/*
* Service FDs are those that most likely won't
* conflict with any 'real-life' ones
......
......@@ -20,12 +20,15 @@
#include <sys/mman.h>
#include <sys/wait.h>
#include <sys/socket.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <netdb.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <sched.h>
#include <ctype.h>
#include "kerndat.h"
#include "page.h"
#include "util.h"
#include "image.h"
......@@ -1348,6 +1351,23 @@ out:
return ret;
}
void rlimit_unlimit_nofile(void)
{
struct rlimit new;
new.rlim_cur = kdat.sysctl_nr_open;
new.rlim_max = kdat.sysctl_nr_open;
if (prlimit(getpid(), RLIMIT_NOFILE, &new, NULL)) {
pr_perror("rlimit: Can't setup RLIMIT_NOFILE for self");
return;
} else
pr_debug("rlimit: RLIMIT_NOFILE unlimited for self\n");
service_fd_rlim_cur = kdat.sysctl_nr_open;
}
#ifdef __GLIBC__
#include <execinfo.h>
void print_stack_trace(pid_t pid)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment