Commit 6aa4a9c2 authored by Andrei Vagin's avatar Andrei Vagin

netns: restore internal veth devices

When we dump a veth device, the kernel reports where a peer device lives
and we use this information to restore this veth pair.

On restore we set a net ns id for a peer and it is created in the required
netns.

v2: add more comments
Signed-off-by: 's avatarAndrei Vagin <avagin@virtuozzo.com>
parent 7dc4b34e
...@@ -77,6 +77,12 @@ struct netns_id { ...@@ -77,6 +77,12 @@ struct netns_id {
struct list_head node; struct list_head node;
}; };
struct net_link {
unsigned int ifindex;
bool created;
struct list_head node;
};
struct ns_id { struct ns_id {
unsigned int kid; unsigned int kid;
unsigned int id; unsigned int id;
...@@ -120,6 +126,7 @@ struct ns_id { ...@@ -120,6 +126,7 @@ struct ns_id {
int nlsk; /* for sockets collection */ int nlsk; /* for sockets collection */
int seqsk; /* to talk to parasite daemons */ int seqsk; /* to talk to parasite daemons */
struct list_head ids; struct list_head ids;
struct list_head links;
} net; } net;
}; };
}; };
......
...@@ -300,8 +300,10 @@ struct ns_id *rst_new_ns_id(unsigned int id, pid_t pid, ...@@ -300,8 +300,10 @@ struct ns_id *rst_new_ns_id(unsigned int id, pid_t pid,
nsid_add(nsid, nd, id, pid); nsid_add(nsid, nd, id, pid);
nsid->ns_populated = false; nsid->ns_populated = false;
if (nd == &net_ns_desc) if (nd == &net_ns_desc) {
INIT_LIST_HEAD(&nsid->net.ids); INIT_LIST_HEAD(&nsid->net.ids);
INIT_LIST_HEAD(&nsid->net.links);
}
} }
return nsid; return nsid;
...@@ -425,6 +427,7 @@ static unsigned int generate_ns_id(int pid, unsigned int kid, struct ns_desc *nd ...@@ -425,6 +427,7 @@ static unsigned int generate_ns_id(int pid, unsigned int kid, struct ns_desc *nd
if (nd == &net_ns_desc) { if (nd == &net_ns_desc) {
INIT_LIST_HEAD(&nsid->net.ids); INIT_LIST_HEAD(&nsid->net.ids);
INIT_LIST_HEAD(&nsid->net.links);
} }
found: found:
......
...@@ -390,12 +390,22 @@ int write_netdev_img(NetDeviceEntry *nde, struct cr_imgset *fds, struct nlattr * ...@@ -390,12 +390,22 @@ int write_netdev_img(NetDeviceEntry *nde, struct cr_imgset *fds, struct nlattr *
return pb_write_one(img_from_set(fds, CR_FD_NETDEV), nde, PB_NETDEV); return pb_write_one(img_from_set(fds, CR_FD_NETDEV), nde, PB_NETDEV);
} }
static int lookup_net_by_netid(struct ns_id *ns, int net_id)
{
struct netns_id *p;
list_for_each_entry(p, &ns->net.ids, node)
if (p->netnsid_value == net_id)
return p->target_ns_id;
return -1;
}
static int dump_one_netdev(int type, struct ifinfomsg *ifi, static int dump_one_netdev(int type, struct ifinfomsg *ifi,
struct nlattr **tb, struct ns_id *ns, struct cr_imgset *fds, struct nlattr **tb, struct ns_id *ns, struct cr_imgset *fds,
int (*dump)(NetDeviceEntry *, struct cr_imgset *, struct nlattr **info)) int (*dump)(NetDeviceEntry *, struct cr_imgset *, struct nlattr **info))
{ {
int ret = -1; int ret = -1, i, peer_ifindex;
int i;
NetDeviceEntry netdev = NET_DEVICE_ENTRY__INIT; NetDeviceEntry netdev = NET_DEVICE_ENTRY__INIT;
SysctlEntry *confs4 = NULL; SysctlEntry *confs4 = NULL;
int size4 = ARRAY_SIZE(devconfs4); int size4 = ARRAY_SIZE(devconfs4);
...@@ -415,6 +425,39 @@ static int dump_one_netdev(int type, struct ifinfomsg *ifi, ...@@ -415,6 +425,39 @@ static int dump_one_netdev(int type, struct ifinfomsg *ifi,
netdev.flags = ifi->ifi_flags; netdev.flags = ifi->ifi_flags;
netdev.name = RTA_DATA(tb[IFLA_IFNAME]); netdev.name = RTA_DATA(tb[IFLA_IFNAME]);
if (kdat.has_nsid) {
s32 nsid = -1;
peer_ifindex = ifi->ifi_index;
if (tb[IFLA_LINK])
peer_ifindex = nla_get_u32(tb[IFLA_LINK]);
netdev.has_peer_ifindex = true;
netdev.peer_ifindex = peer_ifindex;
if (tb[IFLA_LINK_NETNSID])
nsid = nla_get_s32(tb[IFLA_LINK_NETNSID]);
pr_debug("The peer link is in the %d netns with the %u index\n",
nsid, netdev.peer_ifindex);
if (nsid == -1)
nsid = ns->id;
else
nsid = lookup_net_by_netid(ns, nsid);
if (nsid < 0) {
pr_warn("The %s veth is in an external netns\n",
netdev.name);
} else {
netdev.has_peer_nsid = true;
netdev.peer_nsid = nsid;
}
}
/*
* If kdat.has_nsid is false, a multiple network namespaces are not dumped,
* so if we are here, this means only one netns is dumped.
*/
if (tb[IFLA_ADDRESS] && (type != ND_TYPE__LOOPBACK)) { if (tb[IFLA_ADDRESS] && (type != ND_TYPE__LOOPBACK)) {
netdev.has_address = true; netdev.has_address = true;
netdev.address.data = nla_data(tb[IFLA_ADDRESS]); netdev.address.data = nla_data(tb[IFLA_ADDRESS]);
...@@ -1176,9 +1219,11 @@ enum { ...@@ -1176,9 +1219,11 @@ enum {
#define IFLA_NET_NS_FD 28 #define IFLA_NET_NS_FD 28
#endif #endif
static void veth_peer_info(NetDeviceEntry *nde, struct newlink_req *req) static int veth_peer_info(NetDeviceEntry *nde, struct newlink_req *req,
struct ns_id *ns, int ns_fd)
{ {
char key[100], *val; char key[100], *val;
struct ns_id *peer_ns = NULL;
snprintf(key, sizeof(key), "veth[%s]", nde->name); snprintf(key, sizeof(key), "veth[%s]", nde->name);
val = external_lookup_by_key(key); val = external_lookup_by_key(key);
...@@ -1187,7 +1232,47 @@ static void veth_peer_info(NetDeviceEntry *nde, struct newlink_req *req) ...@@ -1187,7 +1232,47 @@ static void veth_peer_info(NetDeviceEntry *nde, struct newlink_req *req)
aux = strchrnul(val, '@'); aux = strchrnul(val, '@');
addattr_l(&req->h, sizeof(*req), IFLA_IFNAME, val, aux - val); addattr_l(&req->h, sizeof(*req), IFLA_IFNAME, val, aux - val);
addattr_l(&req->h, sizeof(*req), IFLA_NET_NS_FD, &ns_fd, sizeof(ns_fd));
return 0;
} }
if (nde->has_peer_nsid) {
if (ns && nde->peer_nsid == ns->id) {
struct net_link *link;
list_for_each_entry(link, &ns->net.links, node)
if (link->ifindex == nde->peer_ifindex && link->created) {
pr_err("%d\n", nde->peer_ifindex);
req->h.nlmsg_type = RTM_SETLINK;
return 0;
}
}
peer_ns = lookup_ns_by_id(nde->peer_nsid, &net_ns_desc);
if (peer_ns->ns_populated) {
req->h.nlmsg_type = RTM_SETLINK;
return 0;
}
}
if (peer_ns) {
if (ns && nde->peer_nsid == ns->id) {
struct net_link *link;
link = xmalloc(sizeof(*link));
if (link == NULL)
return -1;
link->ifindex = nde->ifindex;
link->created = true;
list_add(&link->node, &ns->net.links);
}
addattr_l(&req->h, sizeof(*req), IFLA_NET_NS_FD, &peer_ns->net.ns_fd, sizeof(int));
return 0;
}
pr_err("Unknown peer net namespace");
return -1;
} }
static int veth_link_info(struct ns_id *ns, NetDeviceEntry *nde, struct newlink_req *req) static int veth_link_info(struct ns_id *ns, NetDeviceEntry *nde, struct newlink_req *req)
...@@ -1196,17 +1281,17 @@ static int veth_link_info(struct ns_id *ns, NetDeviceEntry *nde, struct newlink_ ...@@ -1196,17 +1281,17 @@ static int veth_link_info(struct ns_id *ns, NetDeviceEntry *nde, struct newlink_
struct rtattr *veth_data, *peer_data; struct rtattr *veth_data, *peer_data;
struct ifinfomsg ifm; struct ifinfomsg ifm;
BUG_ON(ns_fd < 0);
addattr_l(&req->h, sizeof(*req), IFLA_INFO_KIND, "veth", 4); addattr_l(&req->h, sizeof(*req), IFLA_INFO_KIND, "veth", 4);
veth_data = NLMSG_TAIL(&req->h); veth_data = NLMSG_TAIL(&req->h);
addattr_l(&req->h, sizeof(*req), IFLA_INFO_DATA, NULL, 0); addattr_l(&req->h, sizeof(*req), IFLA_INFO_DATA, NULL, 0);
peer_data = NLMSG_TAIL(&req->h); peer_data = NLMSG_TAIL(&req->h);
memset(&ifm, 0, sizeof(ifm)); memset(&ifm, 0, sizeof(ifm));
ifm.ifi_index = nde->peer_ifindex;
addattr_l(&req->h, sizeof(*req), VETH_INFO_PEER, &ifm, sizeof(ifm)); addattr_l(&req->h, sizeof(*req), VETH_INFO_PEER, &ifm, sizeof(ifm));
veth_peer_info(nde, req);
addattr_l(&req->h, sizeof(*req), IFLA_NET_NS_FD, &ns_fd, sizeof(ns_fd)); veth_peer_info(nde, req, ns, ns_fd);
peer_data->rta_len = (void *)NLMSG_TAIL(&req->h) - (void *)peer_data; peer_data->rta_len = (void *)NLMSG_TAIL(&req->h) - (void *)peer_data;
veth_data->rta_len = (void *)NLMSG_TAIL(&req->h) - (void *)veth_data; veth_data->rta_len = (void *)NLMSG_TAIL(&req->h) - (void *)veth_data;
...@@ -1502,7 +1587,7 @@ static int restore_links(struct ns_id *ns, NetnsEntry **netns) ...@@ -1502,7 +1587,7 @@ static int restore_links(struct ns_id *ns, NetnsEntry **netns)
ret = restore_link(ns, nde, nlsk, criu_nlsk); ret = restore_link(ns, nde, nlsk, criu_nlsk);
if (ret) { if (ret) {
pr_err("Can't restore link\n"); pr_err("Can't restore link: %d\n", ret);
goto exit; goto exit;
} }
......
...@@ -13,6 +13,7 @@ RUN apt-get update && apt-get install -y \ ...@@ -13,6 +13,7 @@ RUN apt-get update && apt-get install -y \
libcap-dev \ libcap-dev \
iptables \ iptables \
libnl-3-dev \ libnl-3-dev \
libnl-route-3-dev \
libselinux-dev \ libselinux-dev \
pkg-config \ pkg-config \
git-core \ git-core \
......
...@@ -4,7 +4,7 @@ set -x -e ...@@ -4,7 +4,7 @@ set -x -e
TRAVIS_PKGS="protobuf-c-compiler libprotobuf-c0-dev libaio-dev TRAVIS_PKGS="protobuf-c-compiler libprotobuf-c0-dev libaio-dev
libprotobuf-dev protobuf-compiler python-ipaddr libcap-dev libprotobuf-dev protobuf-compiler python-ipaddr libcap-dev
libnl-3-dev gcc-multilib gdb bash python-protobuf libnl-3-dev gcc-multilib gdb bash python-protobuf
libnet-dev util-linux asciidoc xmlto" libnet-dev util-linux asciidoc xmlto libnl-route-3-dev"
travis_prep () { travis_prep () {
[ -n "$SKIP_TRAVIS_PREP" ] && return [ -n "$SKIP_TRAVIS_PREP" ] && return
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment