Commit cab3f176 authored by Pavel Emelyanov's avatar Pavel Emelyanov

soccr/tcp: Restore queues using library

Signed-off-by: 's avatarPavel Emelyanov <xemul@virtuozzo.com>
parent 826a86e4
......@@ -281,10 +281,9 @@ int dump_one_tcp(int fd, struct inet_sk_desc *sk)
return 0;
}
static int __send_tcp_queue(int sk, int queue, u32 len, struct cr_img *img)
static int send_tcp_queue(struct libsoccr_sk *sk, struct libsoccr_sk_data *data,
int queue, u32 len, struct cr_img *img)
{
int ret, err = -1, max_chunk;
int off;
char *buf;
buf = xmalloc(len);
......@@ -294,91 +293,27 @@ static int __send_tcp_queue(int sk, int queue, u32 len, struct cr_img *img)
if (read_img_buf(img, buf, len) < 0)
goto err;
max_chunk = len;
off = 0;
do {
int chunk = len;
if (chunk > max_chunk)
chunk = max_chunk;
ret = send(sk, buf + off, chunk, 0);
if (ret <= 0) {
if (max_chunk > 1024) {
/*
* Kernel not only refuses the whole chunk,
* but refuses to split it into pieces too.
*
* When restoring recv queue in repair mode
* kernel doesn't try hard and just allocates
* a linear skb with the size we pass to the
* system call. Thus, if the size is too big
* for slab allocator, the send just fails
* with ENOMEM.
*
* In any case -- try smaller chunk, hopefully
* there's still enough memory in the system.
*/
max_chunk >>= 1;
continue;
}
pr_perror("Can't restore %d queue data (%d), want (%d:%d:%d)",
queue, ret, chunk, len, max_chunk);
goto err;
}
off += ret;
len -= ret;
} while (len);
if (libsoccr_set_queue_bytes(sk, data, sizeof(*data), queue, buf))
goto err;
err = 0;
err:
xfree(buf);
return 0;
return err;
}
static int send_tcp_queue(int sk, int queue, u32 len, struct cr_img *img)
{
pr_debug("\tRestoring TCP %d queue data %u bytes\n", queue, len);
if (setsockopt(sk, SOL_TCP, TCP_REPAIR_QUEUE, &queue, sizeof(queue)) < 0) {
pr_perror("Can't set repair queue");
return -1;
}
return __send_tcp_queue(sk, queue, len, img);
err:
xfree(buf);
return -1;
}
static int restore_tcp_queues(int sk, TcpStreamEntry *tse, struct cr_img *img)
static int restore_tcp_queues(struct libsoccr_sk *sk, struct libsoccr_sk_data *data, struct cr_img *img)
{
u32 len;
len = tse->inq_len;
if (len && send_tcp_queue(sk, TCP_RECV_QUEUE, len, img))
return -1;
/*
* All data in a write buffer can be divided on two parts sent
* but not yet acknowledged data and unsent data.
* The TCP stack must know which data have been sent, because
* acknowledgment can be received for them. These data must be
* restored in repair mode.
*/
len = tse->outq_len - tse->unsq_len;
if (len && send_tcp_queue(sk, TCP_SEND_QUEUE, len, img))
len = data->inq_len;
if (len && send_tcp_queue(sk, data, TCP_RECV_QUEUE, len, img))
return -1;
/*
* The second part of data have never been sent to outside, so
* they can be restored without any tricks.
*/
len = tse->unsq_len;
tcp_repair_off(sk);
if (len && __send_tcp_queue(sk, TCP_SEND_QUEUE, len, img))
return -1;
if (tcp_repair_on(sk))
len = data->outq_len;
if (len && send_tcp_queue(sk, data, TCP_SEND_QUEUE, len, img))
return -1;
return 0;
......@@ -456,7 +391,7 @@ static int restore_tcp_conn_state(int sk, struct libsoccr_sk *socr, struct inet_
if (restore_prepare_socket(sk))
goto err_c;
if (restore_tcp_queues(sk, tse, img))
if (restore_tcp_queues(socr, &data, img))
goto err_c;
if (libsoccr_set_sk_data(socr, &data, sizeof(data)))
......
......@@ -428,3 +428,105 @@ int libsoccr_set_sk_data(struct libsoccr_sk *sk,
return 0;
}
static int __send_queue(struct libsoccr_sk *sk, int queue, char *buf, __u32 len)
{
int ret, err = -1, max_chunk;
int off;
max_chunk = len;
off = 0;
do {
int chunk = len;
if (chunk > max_chunk)
chunk = max_chunk;
ret = send(sk->fd, buf + off, chunk, 0);
if (ret <= 0) {
if (max_chunk > 1024) {
/*
* Kernel not only refuses the whole chunk,
* but refuses to split it into pieces too.
*
* When restoring recv queue in repair mode
* kernel doesn't try hard and just allocates
* a linear skb with the size we pass to the
* system call. Thus, if the size is too big
* for slab allocator, the send just fails
* with ENOMEM.
*
* In any case -- try smaller chunk, hopefully
* there's still enough memory in the system.
*/
max_chunk >>= 1;
continue;
}
loge("Can't restore %d queue data (%d), want (%d:%d:%d)",
queue, ret, chunk, len, max_chunk);
goto err;
}
off += ret;
len -= ret;
} while (len);
err = 0;
err:
return err;
}
static int send_queue(struct libsoccr_sk *sk, int queue, char *buf, __u32 len)
{
logd("\tRestoring TCP %d queue data %u bytes\n", queue, len);
if (setsockopt(sk->fd, SOL_TCP, TCP_REPAIR_QUEUE, &queue, sizeof(queue)) < 0) {
loge("Can't set repair queue");
return -1;
}
return __send_queue(sk, queue, buf, len);
}
int libsoccr_set_queue_bytes(struct libsoccr_sk *sk, struct libsoccr_sk_data *data, unsigned data_size,
int queue, char *buf)
{
if (!data || data_size < SOCR_DATA_MIN_SIZE)
return -1;
if (queue == TCP_RECV_QUEUE)
return send_queue(sk, TCP_RECV_QUEUE, buf, data->inq_len);
if (queue == TCP_SEND_QUEUE) {
__u32 len, ulen;
/*
* All data in a write buffer can be divided on two parts sent
* but not yet acknowledged data and unsent data.
* The TCP stack must know which data have been sent, because
* acknowledgment can be received for them. These data must be
* restored in repair mode.
*/
ulen = data->unsq_len;
len = data->outq_len - ulen;
if (len && send_queue(sk, TCP_SEND_QUEUE, buf, len))
return -2;
if (ulen) {
/*
* The second part of data have never been sent to outside, so
* they can be restored without any tricks.
*/
tcp_repair_off(sk->fd);
if (__send_queue(sk, TCP_SEND_QUEUE, buf + len, ulen))
return -3;
if (tcp_repair_on(sk->fd))
return -4;
}
return 0;
}
return -5;
}
......@@ -56,4 +56,6 @@ char *libsoccr_get_queue_bytes(struct libsoccr_sk *sk, int queue_id, int steal);
int libsoccr_set_sk_data_unbound(struct libsoccr_sk *sk, struct libsoccr_sk_data *data, unsigned data_size);
int libsoccr_set_sk_data_noq(struct libsoccr_sk *sk, struct libsoccr_sk_data *data, unsigned data_size);
int libsoccr_set_sk_data(struct libsoccr_sk *sk, struct libsoccr_sk_data *data, unsigned data_size);
int libsoccr_set_queue_bytes(struct libsoccr_sk *sk, struct libsoccr_sk_data *data, unsigned data_size,
int queue, char *buf);
#endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment