Commit 9982fc9e authored by Mike Rapoport's avatar Mike Rapoport Committed by Andrei Vagin

criu: page-xfer: add PS_IOV_GET interface

Signed-off-by: 's avatarMike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: 's avatarPavel Emelyanov <xemul@virtuozzo.com>
parent a9a973da
...@@ -44,4 +44,6 @@ extern int disconnect_from_page_server(void); ...@@ -44,4 +44,6 @@ extern int disconnect_from_page_server(void);
extern int check_parent_page_xfer(int fd_type, long id); extern int check_parent_page_xfer(int fd_type, long id);
extern int get_remote_pages(int pid, unsigned long addr, int nr_pages, void *dest);
#endif /* __CR_PAGE_XFER__H__ */ #endif /* __CR_PAGE_XFER__H__ */
...@@ -17,6 +17,8 @@ ...@@ -17,6 +17,8 @@
#include "protobuf.h" #include "protobuf.h"
#include "images/pagemap.pb-c.h" #include "images/pagemap.pb-c.h"
#include "fcntl.h" #include "fcntl.h"
#include "pstree.h"
#include "parasite-syscall.h"
static int page_server_sk = -1; static int page_server_sk = -1;
...@@ -40,6 +42,7 @@ static void psi2iovec(struct page_server_iov *ps, struct iovec *iov) ...@@ -40,6 +42,7 @@ static void psi2iovec(struct page_server_iov *ps, struct iovec *iov)
#define PS_IOV_PARENT 5 #define PS_IOV_PARENT 5
#define PS_IOV_ZERO 6 #define PS_IOV_ZERO 6
#define PS_IOV_LAZY 7 #define PS_IOV_LAZY 7
#define PS_IOV_GET 8
#define PS_IOV_FLUSH 0x1023 #define PS_IOV_FLUSH 0x1023
#define PS_IOV_FLUSH_N_CLOSE 0x1024 #define PS_IOV_FLUSH_N_CLOSE 0x1024
...@@ -700,26 +703,72 @@ static int page_server_hole(int sk, struct page_server_iov *pi) ...@@ -700,26 +703,72 @@ static int page_server_hole(int sk, struct page_server_iov *pi)
return 0; return 0;
} }
static int page_server_get_pages(int sk, struct page_server_iov *pi)
{
struct pstree_item *item;
struct page_pipe *pp;
struct page_pipe_buf *ppb;
struct iovec *iov;
int ret;
item = pstree_item_by_virt(pi->dst_id);
pp = dmpi(item)->mem_pp;
ret = page_pipe_split(pp, pi->vaddr, &pi->nr_pages);
if (ret)
return ret;
if (pi->nr_pages == 0) {
/* no iovs found means we've hit a zero page */
pr_debug("no iovs found, zero pages\n");
return send_psi(sk, PS_IOV_ZERO, 0, 0, 0);
}
ppb = list_first_entry(&pp->bufs, struct page_pipe_buf, l);
iov = &ppb->iov[0];
BUG_ON(!(ppb->flags & PPB_LAZY));
BUG_ON(iov->iov_len != pi->nr_pages * PAGE_SIZE);
BUG_ON(pi->vaddr != encode_pointer(iov->iov_base));
if (send_psi(sk, PS_IOV_ADD, pi->nr_pages, pi->vaddr, pi->dst_id))
return -1;
ret = splice(ppb->p[0], NULL, sk, NULL, iov->iov_len, SPLICE_F_MOVE);
if (ret != iov->iov_len)
return -1;
tcp_nodelay(sk, true);
page_pipe_destroy_ppb(ppb);
return 0;
}
static int page_server_serve(int sk) static int page_server_serve(int sk)
{ {
int ret = -1; int ret = -1;
bool flushed = false; bool flushed = false;
/* if (!opts.lazy_pages) {
* This socket only accepts data except one thing -- it /*
* writes back the has_parent bit from time to time, so * This socket only accepts data except one thing -- it
* make it NODELAY all the time. * writes back the has_parent bit from time to time, so
*/ * make it NODELAY all the time.
tcp_nodelay(sk, true); */
tcp_nodelay(sk, true);
if (pipe(cxfer.p)) { if (pipe(cxfer.p)) {
pr_perror("Can't make pipe for xfer"); pr_perror("Can't make pipe for xfer");
close(sk); close(sk);
return -1; return -1;
} }
cxfer.pipe_size = fcntl(cxfer.p[0], F_GETPIPE_SZ, 0); cxfer.pipe_size = fcntl(cxfer.p[0], F_GETPIPE_SZ, 0);
pr_debug("Created xfer pipe size %u\n", cxfer.pipe_size); pr_debug("Created xfer pipe size %u\n", cxfer.pipe_size);
} else {
tcp_cork(sk, true);
}
while (1) { while (1) {
struct page_server_iov pi; struct page_server_iov pi;
...@@ -773,6 +822,10 @@ static int page_server_serve(int sk) ...@@ -773,6 +822,10 @@ static int page_server_serve(int sk)
flushed = true; flushed = true;
break; break;
} }
case PS_IOV_GET:
flushed = true;
ret = page_server_get_pages(sk, &pi);
break;
default: default:
pr_err("Unknown command %u\n", pi.cmd); pr_err("Unknown command %u\n", pi.cmd);
ret = -1; ret = -1;
...@@ -815,7 +868,8 @@ int cr_page_server(bool daemon_mode, int cfd) ...@@ -815,7 +868,8 @@ int cr_page_server(bool daemon_mode, int cfd)
int sk = -1; int sk = -1;
int ret; int ret;
up_page_ids_base(); if (!opts.lazy_pages)
up_page_ids_base();
if (opts.ps_socket != -1) { if (opts.ps_socket != -1) {
ret = 0; ret = 0;
...@@ -905,3 +959,32 @@ out: ...@@ -905,3 +959,32 @@ out:
close_safe(&page_server_sk); close_safe(&page_server_sk);
return ret ? : status; return ret ? : status;
} }
int get_remote_pages(int pid, unsigned long addr, int nr_pages, void *dest)
{
int ret;
struct page_server_iov pi;
if (send_psi(page_server_sk, PS_IOV_GET, nr_pages, addr, pid))
return -1;
tcp_nodelay(page_server_sk, true);
ret = recv(page_server_sk, &pi, sizeof(pi), MSG_WAITALL);
if (ret != sizeof(pi))
return -1;
/* zero page */
if (pi.cmd == PS_IOV_ZERO)
return 0;
if (pi.nr_pages > nr_pages)
return -1;
ret = recv(page_server_sk, dest, PAGE_SIZE, MSG_WAITALL);
if (ret != PAGE_SIZE)
return -1;
return 1;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment