Commit 98efb3c9 authored by Andrey Vagin's avatar Andrey Vagin Committed by Pavel Emelyanov

tcp: restore the boundary between sent and unsent data

All data in a write buffer can be divided on two parts sent but not yet
acknowledged data and unsent data.

Currently the boundary between sent and unsent data is not dumped and
all the data are restored as if they have already been sent.
This methode can provoke long delays in tcp connection, because a kernel
can wait before retransmitting data.
https://bugzilla.openvz.org/show_bug.cgi?id=2808

The TCP stack must know which data have been sent, because
acknowledgment can be received for them. These data must be restored in
repair mode.

The second part of data have never been sent out, so they can be
restored without any tricks. These data can be sent into socket as
usual.

For restoring unsent data the repair mode is disabled for socket,
but it is enabled back after restoring data. It will be disabled
after unlocking network. In this case window probe is sent, which is
required for waknge the connection.

This patch fixes long delays in tcp connections after dumping and
restoring.

Thanks Pavel for the idea of disabling repair mode for restoring
unsent data.

https://bugzilla.openvz.org/show_bug.cgi?id=2808Signed-off-by: 's avatarAndrey Vagin <avagin@openvz.org>
Signed-off-by: 's avatarPavel Emelyanov <xemul@parallels.com>
parent d917ff15
...@@ -452,19 +452,12 @@ static int restore_tcp_seqs(int sk, TcpStreamEntry *tse) ...@@ -452,19 +452,12 @@ static int restore_tcp_seqs(int sk, TcpStreamEntry *tse)
return 0; return 0;
} }
static int send_tcp_queue(int sk, int queue, u32 len, int imgfd) static int __send_tcp_queue(int sk, int queue, u32 len, int imgfd)
{ {
int ret, err = -1; int ret, err = -1;
int off, max; int off, max;
char *buf; char *buf;
pr_debug("\tRestoring TCP %d queue data %u bytes\n", queue, len);
if (setsockopt(sk, SOL_TCP, TCP_REPAIR_QUEUE, &queue, sizeof(queue)) < 0) {
pr_perror("Can't set repair queue");
return -1;
}
buf = xmalloc(len); buf = xmalloc(len);
if (!buf) if (!buf)
return -1; return -1;
...@@ -494,16 +487,49 @@ err: ...@@ -494,16 +487,49 @@ err:
return err; return err;
} }
static int send_tcp_queue(int sk, int queue, u32 len, int imgfd)
{
pr_debug("\tRestoring TCP %d queue data %u bytes\n", queue, len);
if (setsockopt(sk, SOL_TCP, TCP_REPAIR_QUEUE, &queue, sizeof(queue)) < 0) {
pr_perror("Can't set repair queue");
return -1;
}
return __send_tcp_queue(sk, queue, len, imgfd);
}
static int restore_tcp_queues(int sk, TcpStreamEntry *tse, int fd) static int restore_tcp_queues(int sk, TcpStreamEntry *tse, int fd)
{ {
u32 len;
if (restore_prepare_socket(sk)) if (restore_prepare_socket(sk))
return -1; return -1;
if (tse->inq_len && len = tse->inq_len;
send_tcp_queue(sk, TCP_RECV_QUEUE, tse->inq_len, fd)) if (len && send_tcp_queue(sk, TCP_RECV_QUEUE, len, fd))
return -1;
/*
* All data in a write buffer can be divided on two parts sent
* but not yet acknowledged data and unsent data.
* The TCP stack must know which data have been sent, because
* acknowledgment can be received for them. These data must be
* restored in repair mode.
*/
len = tse->outq_len - tse->unsq_len;
if (len && send_tcp_queue(sk, TCP_SEND_QUEUE, len, fd))
return -1;
/*
* The second part of data have never been sent to outside, so
* they can be restored without any tricks.
*/
len = tse->unsq_len;
tcp_repair_off(sk);
if (len && __send_tcp_queue(sk, TCP_SEND_QUEUE, len, fd))
return -1; return -1;
if (tse->outq_len && if (tcp_repair_on(sk))
send_tcp_queue(sk, TCP_SEND_QUEUE, tse->outq_len, fd))
return -1; return -1;
return 0; return 0;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment