Commit 3c7d01f6 authored by Pavel Emelyanov's avatar Pavel Emelyanov

net: Pre-create nl diag sk

The setns() syscall (called by switch_ns()) can be extremely
slow. If we call it two or more times from the same task the
kernel will synchonously go on a very slow routine called
synchronize_rcu() trying to put a reference on old namespaces.

To avoid doing this more than once I propose to create all
per-ns sockets in one place with one setns call. In this
patch there's on nl diag socket used to collect other sockets
is created this way.
Signed-off-by: 's avatarPavel Emelyanov <xemul@parallels.com>
parent 4f9acb6a
#include <unistd.h>
#include <linux/netlink.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/eventfd.h>
......@@ -7,6 +8,7 @@
#include <sys/signalfd.h>
#include <sys/ptrace.h>
#include <sys/wait.h>
#include <sys/socket.h>
#include <fcntl.h>
#include <signal.h>
#include <linux/if.h>
......@@ -91,11 +93,23 @@ static int check_map_files(void)
return -1;
}
#ifndef NETLINK_SOCK_DIAG
#define NETLINK_SOCK_DIAG NETLINK_INET_DIAG
#endif
static int check_sock_diag(void)
{
int ret;
struct ns_id ns;
ns.pid = 0;
ns.net.nlsk = socket(PF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG);
if (ns.net.nlsk < 0) {
pr_perror("Can't make diag socket for check");
return -1;
}
ret = collect_sockets(0);
ret = collect_sockets(&ns);
if (!ret)
return 0;
......
......@@ -21,6 +21,11 @@ struct ns_id {
struct mount_info *mntinfo_list;
struct mount_info *mntinfo_tree;
} mnt;
struct {
int nlsk; /* for sockets collection */
int seqsk; /* to talk to parasite daemons */
} net;
};
};
extern struct ns_id *ns_ids;
......
......@@ -32,7 +32,8 @@ extern int restore_prepare_socket(int sk);
extern bool socket_test_collect_bit(unsigned int family, unsigned int proto);
extern int sk_collect_one(int ino, int family, struct socket_desc *d);
extern int collect_sockets(int pid);
struct ns_id;
extern int collect_sockets(struct ns_id *);
extern int collect_inet_sockets(void);
extern struct collect_image_info unix_sk_cinfo;
extern int collect_unix_sockets(void);
......
......@@ -626,10 +626,58 @@ int veth_pair_add(char *in, char *out)
return 0;
}
/*
* The setns() syscall (called by switch_ns()) can be extremely
* slow. If we call it two or more times from the same task the
* kernel will synchonously go on a very slow routine called
* synchronize_rcu() trying to put a reference on old namespaces.
*
* To avoid doing this more than once we pre-create all the
* needed other-ns sockets in advance.
*/
static int prep_ns_sockets(struct ns_id *ns)
{
int nsret = -1, ret;
if (ns->pid != getpid()) {
pr_info("Switching to %d's net for collecting sockets\n", ns->pid);
if (switch_ns(ns->pid, &net_ns_desc, &nsret))
return -1;
}
ret = ns->net.nlsk = socket(PF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG);
if (ret < 0) {
pr_perror("Can't create sock diag socket");
goto err_nl;
}
ret = 0;
out:
if (nsret >= 0 && restore_ns(nsret, &net_ns_desc) < 0) {
nsret = -1;
if (ret == 0)
goto err_ret;
}
return ret;
err_ret:
close(ns->net.nlsk);
err_nl:
goto out;
}
static int collect_net_ns(struct ns_id *ns)
{
int ret;
pr_info("Collecting netns %d/%d\n", ns->id, ns->pid);
return collect_sockets(ns->pid);
ret = prep_ns_sockets(ns);
if (ret)
return ret;
return collect_sockets(ns);
}
int collect_net_namespaces(void)
......
......@@ -515,27 +515,12 @@ static int do_collect_req(int nl, struct sock_diag_req *req, int size,
return tmp;
}
int collect_sockets(int pid)
int collect_sockets(struct ns_id *ns)
{
int err = 0, tmp;
int rst = -1;
int nl;
int nl = ns->net.nlsk;
struct sock_diag_req req;
if (root_ns_mask & CLONE_NEWNET) {
pr_info("Switching to %d's net for collecting sockets\n", pid);
if (switch_ns(pid, &net_ns_desc, &rst))
return -1;
}
nl = socket(PF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG);
if (nl < 0) {
pr_perror("Can't create sock diag socket");
err = -1;
goto out;
}
memset(&req, 0, sizeof(req));
req.hdr.nlmsg_len = sizeof(req);
req.hdr.nlmsg_type = SOCK_DIAG_BY_FAMILY;
......@@ -615,7 +600,7 @@ int collect_sockets(int pid)
tmp = do_collect_req(nl, &req, sizeof(req), packet_receive_one, NULL);
if (tmp) {
pr_warn("The current kernel doesn't support packet_diag\n");
if (pid == 0 || tmp != -ENOENT) /* Fedora 19 */
if (ns->pid == 0 || tmp != -ENOENT) /* Fedora 19 */
err = tmp;
}
......@@ -625,16 +610,15 @@ int collect_sockets(int pid)
tmp = do_collect_req(nl, &req, sizeof(req), netlink_receive_one, NULL);
if (tmp) {
pr_warn("The current kernel doesn't support netlink_diag\n");
if (pid == 0 || tmp != -ENOENT) /* Fedora 19 */
if (ns->pid == 0 || tmp != -ENOENT) /* Fedora 19 */
err = tmp;
}
/* don't need anymore */
close(nl);
out:
if (rst >= 0) {
if (restore_ns(rst, &net_ns_desc) < 0)
err = -1;
} else if (pid != 0) {
ns->net.nlsk = -1;
if (ns->pid == getpid()) {
/*
* If netns isn't dumped, criu will fail only
* if an unsupported socket will be really dumped.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment