Commit a20ed3c6 authored by Pavel Emelyanov's avatar Pavel Emelyanov

page-server: Fine grained corking control (v3)

When live migrating a container with large amount of processes
inside the time to do page-server-ed dump may be up to 10 times
slower than for the local dump.

The delay is always introduced in the open_page_server_xfer()
when criu negotiates the has_parent bit on the 2nd task. This
likely happens because of the Nagel algo taking place -- after
the write() of the OPEN2 command happened kernel delays this
command sending waiting for more data.

v2:
Fix this by turning on CORK option on memory transfer sockets
on send side, and NODELAY one once on urgent data. Receive
side is always NODELAY-ed. According to Alexey Kuznetsov this
is the best mode ever for such type of transfers.

v3:
Push packets in pre-dump's check_parent_server_xfer too.
Signed-off-by: 's avatarPavel Emelyanov <xemul@parallels.com>
Acked-by: 's avatarAndrew Vagin <avagin@odin.com>
parent 73cb87f9
...@@ -266,5 +266,8 @@ int fd_has_data(int lfd); ...@@ -266,5 +266,8 @@ int fd_has_data(int lfd);
int make_yard(char *path); int make_yard(char *path);
void tcp_nodelay(int sk, bool on);
void tcp_cork(int sk, bool on);
const char *ns_to_string(unsigned int ns); const char *ns_to_string(unsigned int ns);
#endif /* __CR_UTIL_H__ */ #endif /* __CR_UTIL_H__ */
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
#include "image.h" #include "image.h"
#include "page-xfer.h" #include "page-xfer.h"
#include "page-pipe.h" #include "page-pipe.h"
#include "util.h"
#include "protobuf.h" #include "protobuf.h"
#include "protobuf/pagemap.pb-c.h" #include "protobuf/pagemap.pb-c.h"
...@@ -183,6 +183,13 @@ static int page_server_serve(int sk) ...@@ -183,6 +183,13 @@ static int page_server_serve(int sk)
int ret = -1; int ret = -1;
bool flushed = false; bool flushed = false;
/*
* This socket only accepts data except one thing -- it
* writes back the has_parent bit from time to time, so
* make it NODELAY all the time.
*/
tcp_nodelay(sk, true);
if (pipe(cxfer.p)) { if (pipe(cxfer.p)) {
pr_perror("Can't make pipe for xfer"); pr_perror("Can't make pipe for xfer");
close(sk); close(sk);
...@@ -400,7 +407,7 @@ int connect_to_page_server(void) ...@@ -400,7 +407,7 @@ int connect_to_page_server(void)
if (opts.ps_socket != -1) { if (opts.ps_socket != -1) {
page_server_sk = opts.ps_socket; page_server_sk = opts.ps_socket;
pr_info("Re-using ps socket %d\n", page_server_sk); pr_info("Re-using ps socket %d\n", page_server_sk);
return 0; goto out;
} }
pr_info("Connecting to server %s:%u\n", pr_info("Connecting to server %s:%u\n",
...@@ -420,6 +427,13 @@ int connect_to_page_server(void) ...@@ -420,6 +427,13 @@ int connect_to_page_server(void)
return -1; return -1;
} }
out:
/*
* CORK the socket at the very beginning. As per ANK
* the corked by default socket with sporadic NODELAY-s
* on urgent data is the smartest mode ever.
*/
tcp_cork(page_server_sk, true);
return 0; return 0;
} }
...@@ -538,6 +552,9 @@ static int open_page_server_xfer(struct page_xfer *xfer, int fd_type, long id) ...@@ -538,6 +552,9 @@ static int open_page_server_xfer(struct page_xfer *xfer, int fd_type, long id)
return -1; return -1;
} }
/* Push the command NOW */
tcp_nodelay(xfer->sk, true);
if (read(xfer->sk, &has_parent, 1) != 1) { if (read(xfer->sk, &has_parent, 1) != 1) {
pr_perror("The page server doesn't answer"); pr_perror("The page server doesn't answer");
return -1; return -1;
...@@ -840,6 +857,8 @@ static int check_parent_server_xfer(int fd_type, long id) ...@@ -840,6 +857,8 @@ static int check_parent_server_xfer(int fd_type, long id)
return -1; return -1;
} }
tcp_nodelay(page_server_sk, true);
if (read(page_server_sk, &has_parent, sizeof(int)) != sizeof(int)) { if (read(page_server_sk, &has_parent, sizeof(int)) != sizeof(int)) {
pr_perror("The page server doesn't answer"); pr_perror("The page server doesn't answer");
return -1; return -1;
......
...@@ -28,6 +28,9 @@ ...@@ -28,6 +28,9 @@
#include <sys/wait.h> #include <sys/wait.h>
#include <sys/resource.h> #include <sys/resource.h>
#include <sys/wait.h> #include <sys/wait.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include "compiler.h" #include "compiler.h"
#include "asm/types.h" #include "asm/types.h"
...@@ -880,3 +883,15 @@ const char *ns_to_string(unsigned int ns) ...@@ -880,3 +883,15 @@ const char *ns_to_string(unsigned int ns)
return NULL; return NULL;
} }
} }
void tcp_cork(int sk, bool on)
{
int val = on ? 1 : 0;
setsockopt(sk, SOL_TCP, TCP_CORK, &val, sizeof(val));
}
void tcp_nodelay(int sk, bool on)
{
int val = on ? 1 : 0;
setsockopt(sk, SOL_TCP, TCP_NODELAY, &val, sizeof(val));
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment