Commit 1b4e9058 authored by Saied Kazemi's avatar Saied Kazemi Committed by Pavel Emelyanov

Do not call listen() when SO_REUSEADDR is off

For an established TCP connection, the send queue is restored in two
steps: in step (1), we retransmit the data that was sent before but not
yet acknowledged, and in step (2), we transmit the data that was never
sent outside before.  The TCP_REPAIR option is disabled before step (2)
and re-enabled after step (2) (without this patch).

If the amount of data to be sent in step (2) is large, the TCP_REPAIR
flag on the socket can remain off for some time (O(milliseconds)).  If a
listen() is called on another socket bound to the same port during this
time window, it fails. This is because -- turning TCP_REPAIR off clears
the SO_REUSEADDR flag on the socket.

This patch adds a mutex (reuseaddr_lock) per port number, so that a
listen() on a port number does not happen while SO_REUSEADDR for another
socket on the same port is off.

Thanks to Amey Deshpande <ameyd@google.com> for debugging.
Signed-off-by: 's avatarSaied Kazemi <saied@google.com>
Signed-off-by: 's avatarPavel Emelyanov <xemul@parallels.com>
parent 603929ca
...@@ -79,5 +79,6 @@ extern int restore_one_tcp(int sk, struct inet_sk_info *si); ...@@ -79,5 +79,6 @@ extern int restore_one_tcp(int sk, struct inet_sk_info *si);
extern int check_tcp(void); extern int check_tcp(void);
extern int rst_tcp_socks_add(int fd, bool reuseaddr); extern int rst_tcp_socks_add(int fd, bool reuseaddr);
extern mutex_t *inet_get_reuseaddr_lock(struct inet_sk_info *ii);
#endif /* __CR_SK_INET_H__ */ #endif /* __CR_SK_INET_H__ */
...@@ -30,6 +30,7 @@ struct inet_port { ...@@ -30,6 +30,7 @@ struct inet_port {
int port; int port;
int type; int type;
futex_t users; futex_t users;
mutex_t reuseaddr_lock;
struct list_head list; struct list_head list;
}; };
...@@ -53,6 +54,7 @@ static struct inet_port *port_add(int type, int port) ...@@ -53,6 +54,7 @@ static struct inet_port *port_add(int type, int port)
e->type = type; e->type = type;
futex_init(&e->users); futex_init(&e->users);
futex_inc(&e->users); futex_inc(&e->users);
mutex_init(&e->reuseaddr_lock);
list_add(&e->list, &inet_ports); list_add(&e->list, &inet_ports);
...@@ -537,10 +539,13 @@ static int open_inet_sk(struct file_desc *d) ...@@ -537,10 +539,13 @@ static int open_inet_sk(struct file_desc *d)
goto err; goto err;
} }
mutex_lock(&ii->port->reuseaddr_lock);
if (listen(sk, ie->backlog) == -1) { if (listen(sk, ie->backlog) == -1) {
pr_perror("Can't listen on a socket"); pr_perror("Can't listen on a socket");
mutex_unlock(&ii->port->reuseaddr_lock);
goto err; goto err;
} }
mutex_unlock(&ii->port->reuseaddr_lock);
} }
if (ie->state == TCP_ESTABLISHED && if (ie->state == TCP_ESTABLISHED &&
...@@ -624,3 +629,8 @@ int inet_connect(int sk, struct inet_sk_info *ii) ...@@ -624,3 +629,8 @@ int inet_connect(int sk, struct inet_sk_info *ii)
return 0; return 0;
} }
mutex_t *inet_get_reuseaddr_lock(struct inet_sk_info *ii)
{
return &ii->port->reuseaddr_lock;
}
...@@ -507,7 +507,7 @@ static int send_tcp_queue(int sk, int queue, u32 len, struct cr_img *img) ...@@ -507,7 +507,7 @@ static int send_tcp_queue(int sk, int queue, u32 len, struct cr_img *img)
return __send_tcp_queue(sk, queue, len, img); return __send_tcp_queue(sk, queue, len, img);
} }
static int restore_tcp_queues(int sk, TcpStreamEntry *tse, struct cr_img *img) static int restore_tcp_queues(int sk, TcpStreamEntry *tse, struct cr_img *img, mutex_t *reuse_lock)
{ {
u32 len; u32 len;
...@@ -534,11 +534,17 @@ static int restore_tcp_queues(int sk, TcpStreamEntry *tse, struct cr_img *img) ...@@ -534,11 +534,17 @@ static int restore_tcp_queues(int sk, TcpStreamEntry *tse, struct cr_img *img)
* they can be restored without any tricks. * they can be restored without any tricks.
*/ */
len = tse->unsq_len; len = tse->unsq_len;
mutex_lock(reuse_lock);
tcp_repair_off(sk); tcp_repair_off(sk);
if (len && __send_tcp_queue(sk, TCP_SEND_QUEUE, len, img)) if (len && __send_tcp_queue(sk, TCP_SEND_QUEUE, len, img)) {
mutex_unlock(reuse_lock);
return -1; return -1;
if (tcp_repair_on(sk)) }
if (tcp_repair_on(sk)) {
mutex_unlock(reuse_lock);
return -1; return -1;
}
mutex_unlock(reuse_lock);
return 0; return 0;
} }
...@@ -621,7 +627,7 @@ static int restore_tcp_conn_state(int sk, struct inet_sk_info *ii) ...@@ -621,7 +627,7 @@ static int restore_tcp_conn_state(int sk, struct inet_sk_info *ii)
if (restore_tcp_opts(sk, tse)) if (restore_tcp_opts(sk, tse))
goto err_c; goto err_c;
if (restore_tcp_queues(sk, tse, img)) if (restore_tcp_queues(sk, tse, img, inet_get_reuseaddr_lock(ii)))
goto err_c; goto err_c;
if (tse->has_nodelay && tse->nodelay) { if (tse->has_nodelay && tse->nodelay) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment