Commit a74605a7 authored by Andrey Vagin's avatar Andrey Vagin Committed by Pavel Emelyanov

sk-inet: restore option REUSEADDR (v2)

All sockets are created with SO_REUSEADDR, it's needed for restoring.
E.g.: A listen socket is created after a connected socket. Both of them
are binded to one port.

So SO_REUSEADDR should be restored, when all sockets on a port were created.

This code creates a structure for each port of one type of sockets
and accounts a number of sockets, which are not restored yet.

Sockets have a hook post_open(), in which it waits when all sockets for
a defined port would be created and then it will restore SO_REUSEADDR.

struct port contains a type (udp, tcp, etc) and a port number.
It doesn't contain family or addr, because it's extra loads of logic,
which doesn't bring a significant profits.

v2: fix according with comments from Pavel
Signed-off-by: 's avatarAndrey Vagin <avagin@openvz.org>
Signed-off-by: 's avatarPavel Emelyanov <xemul@parallels.com>
parent 61490ba9
...@@ -22,9 +22,11 @@ struct inet_sk_desc { ...@@ -22,9 +22,11 @@ struct inet_sk_desc {
struct list_head rlist; struct list_head rlist;
}; };
struct inet_port;
struct inet_sk_info { struct inet_sk_info {
InetSkEntry *ie; InetSkEntry *ie;
struct file_desc d; struct file_desc d;
struct inet_port *port;
struct list_head rlist; struct list_head rlist;
}; };
......
...@@ -6,4 +6,5 @@ message sk_opts_entry { ...@@ -6,4 +6,5 @@ message sk_opts_entry {
required uint64 so_snd_tmo_usec = 4; required uint64 so_snd_tmo_usec = 4;
required uint64 so_rcv_tmo_sec = 5; required uint64 so_rcv_tmo_sec = 5;
required uint64 so_rcv_tmo_usec = 6; required uint64 so_rcv_tmo_usec = 6;
optional bool reuseaddr = 7;
} }
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
#include <sys/socket.h> #include <sys/socket.h>
#include <linux/netlink.h> #include <linux/netlink.h>
#include <linux/rtnetlink.h> #include <linux/rtnetlink.h>
#include <sys/mman.h>
#include <unistd.h> #include <unistd.h>
#include <netinet/tcp.h> #include <netinet/tcp.h>
#include <arpa/inet.h> #include <arpa/inet.h>
...@@ -22,6 +23,41 @@ ...@@ -22,6 +23,41 @@
#define PB_ALEN_INET 1 #define PB_ALEN_INET 1
#define PB_ALEN_INET6 4 #define PB_ALEN_INET6 4
static LIST_HEAD(inet_ports);
struct inet_port {
int port;
int type;
futex_t users;
struct list_head list;
};
static struct inet_port *port_add(int type, int port)
{
struct inet_port *e;
list_for_each_entry(e, &inet_ports, list)
if (e->type == type && e->port == port) {
futex_inc(&e->users);
return e;
}
e = shmalloc(sizeof(*e));
if (e == NULL) {
pr_err("Not enough memory\n");
return NULL;
}
e->port = port;
e->type = type;
futex_init(&e->users);
futex_inc(&e->users);
list_add(&e->list, &inet_ports);
return e;
}
static void show_one_inet(const char *act, const struct inet_sk_desc *sk) static void show_one_inet(const char *act, const struct inet_sk_desc *sk)
{ {
char src_addr[INET_ADDR_LEN] = "<unknown>"; char src_addr[INET_ADDR_LEN] = "<unknown>";
...@@ -316,10 +352,12 @@ static bool is_bound(struct inet_sk_info *ii) ...@@ -316,10 +352,12 @@ static bool is_bound(struct inet_sk_info *ii)
static int open_inet_sk(struct file_desc *d); static int open_inet_sk(struct file_desc *d);
static int post_open_inet_sk(struct file_desc *d, int sk);
static struct file_desc_ops inet_desc_ops = { static struct file_desc_ops inet_desc_ops = {
.type = FD_TYPES__INETSK, .type = FD_TYPES__INETSK,
.open = open_inet_sk, .open = open_inet_sk,
.post_open = post_open_inet_sk,
}; };
static int collect_one_inetsk(void *o, ProtobufCMessage *base) static int collect_one_inetsk(void *o, ProtobufCMessage *base)
...@@ -331,6 +369,15 @@ static int collect_one_inetsk(void *o, ProtobufCMessage *base) ...@@ -331,6 +369,15 @@ static int collect_one_inetsk(void *o, ProtobufCMessage *base)
if (tcp_connection(ii->ie)) if (tcp_connection(ii->ie))
tcp_locked_conn_add(ii); tcp_locked_conn_add(ii);
/*
* A socket can reuse addr only if all previous sockets allow that,
* so a value of SO_REUSEADDR can be restored after restoring all
* sockets.
*/
ii->port = port_add(ii->ie->type, ii->ie->src_port);
if (ii->port == NULL)
return -1;
return 0; return 0;
} }
...@@ -360,10 +407,27 @@ static int inet_validate_address(InetSkEntry *ie) ...@@ -360,10 +407,27 @@ static int inet_validate_address(InetSkEntry *ie)
return -1; return -1;
} }
static int post_open_inet_sk(struct file_desc *d, int sk)
{
struct inet_sk_info *ii;
int no = 0;
ii = container_of(d, struct inet_sk_info, d);
if (!ii->ie->opts->reuseaddr) {
futex_wait_until(&ii->port->users, 0);
if (restore_opt(sk, SOL_SOCKET, SO_REUSEADDR, &no))
return -1;
}
return 0;
}
static int open_inet_sk(struct file_desc *d) static int open_inet_sk(struct file_desc *d)
{ {
struct inet_sk_info *ii; struct inet_sk_info *ii;
int sk; int sk, yes = 1;
ii = container_of(d, struct inet_sk_info, d); ii = container_of(d, struct inet_sk_info, d);
...@@ -389,12 +453,17 @@ static int open_inet_sk(struct file_desc *d) ...@@ -389,12 +453,17 @@ static int open_inet_sk(struct file_desc *d)
} }
if (ii->ie->v6only) { if (ii->ie->v6only) {
int yes = 1;
if (restore_opt(sk, SOL_IPV6, IPV6_V6ONLY, &yes) == -1) if (restore_opt(sk, SOL_IPV6, IPV6_V6ONLY, &yes) == -1)
return -1; return -1;
} }
/*
* Set SO_REUSEADDR, because some sockets can be bound to one addr.
* The origin value of SO_REUSEADDR will be restored in post_open.
*/
if (restore_opt(sk, SOL_SOCKET, SO_REUSEADDR, &yes))
return -1;
if (tcp_connection(ii->ie)) { if (tcp_connection(ii->ie)) {
if (!opts.tcp_established_ok) { if (!opts.tcp_established_ok) {
pr_err("Connected TCP socket in image\n"); pr_err("Connected TCP socket in image\n");
...@@ -433,6 +502,8 @@ static int open_inet_sk(struct file_desc *d) ...@@ -433,6 +502,8 @@ static int open_inet_sk(struct file_desc *d)
inet_connect(sk, ii)) inet_connect(sk, ii))
goto err; goto err;
done: done:
futex_dec(&ii->port->users);
if (rst_file_params(sk, ii->ie->fown, ii->ie->flags)) if (rst_file_params(sk, ii->ie->fown, ii->ie->flags))
goto err; goto err;
......
...@@ -86,6 +86,8 @@ int restore_socket_opts(int sk, SkOptsEntry *soe) ...@@ -86,6 +86,8 @@ int restore_socket_opts(int sk, SkOptsEntry *soe)
tv.tv_usec = soe->so_rcv_tmo_usec; tv.tv_usec = soe->so_rcv_tmo_usec;
ret |= restore_opt(sk, SOL_SOCKET, SO_RCVTIMEO, &tv); ret |= restore_opt(sk, SOL_SOCKET, SO_RCVTIMEO, &tv);
/* The restore of SO_REUSEADDR depends on type of socket */
return ret; return ret;
} }
...@@ -109,7 +111,7 @@ int do_dump_opt(int sk, int level, int name, void *val, int len) ...@@ -109,7 +111,7 @@ int do_dump_opt(int sk, int level, int name, void *val, int len)
int dump_socket_opts(int sk, SkOptsEntry *soe) int dump_socket_opts(int sk, SkOptsEntry *soe)
{ {
int ret = 0; int ret = 0, val;
struct timeval tv; struct timeval tv;
ret |= dump_opt(sk, SOL_SOCKET, SO_SNDBUF, &soe->so_sndbuf); ret |= dump_opt(sk, SOL_SOCKET, SO_SNDBUF, &soe->so_sndbuf);
...@@ -123,6 +125,10 @@ int dump_socket_opts(int sk, SkOptsEntry *soe) ...@@ -123,6 +125,10 @@ int dump_socket_opts(int sk, SkOptsEntry *soe)
soe->so_rcv_tmo_sec = tv.tv_sec; soe->so_rcv_tmo_sec = tv.tv_sec;
soe->so_rcv_tmo_usec = tv.tv_usec; soe->so_rcv_tmo_usec = tv.tv_usec;
ret |= dump_opt(sk, SOL_SOCKET, SO_REUSEADDR, &val);
soe->reuseaddr = val ? true : false;
soe->has_reuseaddr = true;
return ret; return ret;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment