Commit 5ad71a89 authored by Mike Rapoport's avatar Mike Rapoport Committed by Andrei Vagin

criu: lazy-pages: enable remoting of lazy pages

The remote lazy pages variant can be run as follows:

src# criu dump -t <pid> --lazy-pages --port 9876 -D /tmp/1 &
src# while ! sudo fuser 9876/tcp ; do sleep 1; done
src# scp -r /tmp/1/ dst:/tmp/

dst# criu lazy-pages --page-server --address dst --port 9876 -D /tmp/1 &
dst# criu restore --lazy-pages -D /tmp/1

In a nutshell, this implementation of remote lazy pages does the following:

- dump collects the process memory into the pipes, transfers non-lazy pages
  to the images or to the page-server on the restore side. The lazy pages
  are kept in pipes for later transfer
- when the dump creates the page_pipe_bufs, it marks the buffers containing
potentially lazy pages with PPB_LAZY
- at the dump_finish stage, the dump side starts TCP server that will
handle page requests from the restore side
- the checkpoint directory is transferred to the restore side
- on the restore side lazy-pages daemon is started, it creates UNIX socket
to receive uffd's from the restore and a TCP socket to forward page
requests to the dump side
- restore creates memory mappings and fills the VMAs that cannot be handled
by uffd with the contents of the pages*img.
- restore registers lazy VMAs with uffd and sends the userfault file
descriptors to the lazy-pages daemon
- when a #PF occurs, the lazy-pages daemon sends PS_IOV_GET command to the dump
side; the command contains PID, the faulting address and amount of pages
(always 1 at the moment)
- the dump side extracts the requested pages from the pipe and splices them
into the TCP socket.
- the lazy-pages daemon copies the received pages into the restored process
address space
Signed-off-by: 's avatarMike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: 's avatarPavel Emelyanov <xemul@virtuozzo.com>
parent 9982fc9e
...@@ -1187,6 +1187,7 @@ static int pre_dump_one_task(struct pstree_item *item) ...@@ -1187,6 +1187,7 @@ static int pre_dump_one_task(struct pstree_item *item)
item->pid->ns[0].virt = misc.pid; item->pid->ns[0].virt = misc.pid;
mdc.pre_dump = true; mdc.pre_dump = true;
mdc.lazy = false;
ret = parasite_dump_pages_seized(item, &vmas, &mdc, parasite_ctl); ret = parasite_dump_pages_seized(item, &vmas, &mdc, parasite_ctl);
if (ret) if (ret)
...@@ -1346,6 +1347,7 @@ static int dump_one_task(struct pstree_item *item) ...@@ -1346,6 +1347,7 @@ static int dump_one_task(struct pstree_item *item)
} }
mdc.pre_dump = false; mdc.pre_dump = false;
mdc.lazy = opts.lazy_pages;
ret = parasite_dump_pages_seized(item, &vmas, &mdc, parasite_ctl); ret = parasite_dump_pages_seized(item, &vmas, &mdc, parasite_ctl);
if (ret) if (ret)
...@@ -1387,7 +1389,10 @@ static int dump_one_task(struct pstree_item *item) ...@@ -1387,7 +1389,10 @@ static int dump_one_task(struct pstree_item *item)
goto err; goto err;
} }
ret = compel_cure(parasite_ctl); if (opts.lazy_pages)
ret = compel_cure_remote(parasite_ctl);
else
ret = compel_cure(parasite_ctl);
if (ret) { if (ret) {
pr_err("Can't cure (pid: %d) from parasite\n", pid); pr_err("Can't cure (pid: %d) from parasite\n", pid);
goto err; goto err;
...@@ -1591,6 +1596,27 @@ err: ...@@ -1591,6 +1596,27 @@ err:
return cr_pre_dump_finish(ret); return cr_pre_dump_finish(ret);
} }
static int cr_lazy_mem_dump(void)
{
struct pstree_item *item;
int ret = 0;
pr_info("Starting lazy pages server\n");
ret = cr_page_server(false, -1);
for_each_pstree_item(item) {
destroy_page_pipe(dmpi(item)->mem_pp);
compel_cure_local(dmpi(item)->parasite_ctl);
}
if (ret)
pr_err("Lazy pages transfer FAILED.\n");
else
pr_info("Lazy pages transfer finished successfully\n");
return ret;
}
static int cr_dump_finish(int ret) static int cr_dump_finish(int ret)
{ {
int post_dump_ret = 0; int post_dump_ret = 0;
...@@ -1650,6 +1676,10 @@ static int cr_dump_finish(int ret) ...@@ -1650,6 +1676,10 @@ static int cr_dump_finish(int ret)
delete_link_remaps(); delete_link_remaps();
clean_cr_time_mounts(); clean_cr_time_mounts();
} }
if (opts.lazy_pages)
ret = cr_lazy_mem_dump();
arch_set_thread_regs(root_item); arch_set_thread_regs(root_item);
pstree_switch_state(root_item, pstree_switch_state(root_item,
(ret || post_dump_ret) ? (ret || post_dump_ret) ?
......
...@@ -13,6 +13,7 @@ struct vma_area; ...@@ -13,6 +13,7 @@ struct vma_area;
struct mem_dump_ctl { struct mem_dump_ctl {
bool pre_dump; bool pre_dump;
bool lazy;
}; };
extern bool vma_has_guard_gap_hidden(struct vma_area *vma); extern bool vma_has_guard_gap_hidden(struct vma_area *vma);
......
...@@ -267,7 +267,7 @@ static int drain_pages(struct page_pipe *pp, struct parasite_ctl *ctl, ...@@ -267,7 +267,7 @@ static int drain_pages(struct page_pipe *pp, struct parasite_ctl *ctl,
return 0; return 0;
} }
static int xfer_pages(struct page_pipe *pp, struct page_xfer *xfer) static int xfer_pages(struct page_pipe *pp, struct page_xfer *xfer, bool lazy)
{ {
int ret; int ret;
...@@ -276,7 +276,7 @@ static int xfer_pages(struct page_pipe *pp, struct page_xfer *xfer) ...@@ -276,7 +276,7 @@ static int xfer_pages(struct page_pipe *pp, struct page_xfer *xfer)
* pre-dump action (see pre_dump_one_task) * pre-dump action (see pre_dump_one_task)
*/ */
timing_start(TIME_MEMWRITE); timing_start(TIME_MEMWRITE);
ret = page_xfer_dump_pages(xfer, pp, 0, true); ret = page_xfer_dump_pages(xfer, pp, 0, !lazy);
timing_stop(TIME_MEMWRITE); timing_stop(TIME_MEMWRITE);
return ret; return ret;
...@@ -316,7 +316,7 @@ static int __parasite_dump_pages_seized(struct pstree_item *item, ...@@ -316,7 +316,7 @@ static int __parasite_dump_pages_seized(struct pstree_item *item,
return -1; return -1;
ret = -1; ret = -1;
if (!mdc->pre_dump) if (!(mdc->pre_dump || mdc->lazy))
/* /*
* Chunk mode pushes pages portion by portion. This mode * Chunk mode pushes pages portion by portion. This mode
* only works when we don't need to keep pp for later * only works when we don't need to keep pp for later
...@@ -324,7 +324,8 @@ static int __parasite_dump_pages_seized(struct pstree_item *item, ...@@ -324,7 +324,8 @@ static int __parasite_dump_pages_seized(struct pstree_item *item,
*/ */
cpp_flags |= PP_CHUNK_MODE; cpp_flags |= PP_CHUNK_MODE;
pp = create_page_pipe(vma_area_list->priv_size, pp = create_page_pipe(vma_area_list->priv_size,
pargs_iovs(args), cpp_flags); mdc->lazy ? NULL : pargs_iovs(args),
cpp_flags);
if (!pp) if (!pp)
goto out; goto out;
...@@ -378,7 +379,7 @@ again: ...@@ -378,7 +379,7 @@ again:
ret = drain_pages(pp, ctl, args); ret = drain_pages(pp, ctl, args);
if (!ret) if (!ret)
ret = xfer_pages(pp, &xfer); ret = xfer_pages(pp, &xfer, mdc->lazy /* false actually */);
if (!ret) { if (!ret) {
page_pipe_reinit(pp); page_pipe_reinit(pp);
goto again; goto again;
...@@ -389,9 +390,12 @@ again: ...@@ -389,9 +390,12 @@ again:
goto out_xfer; goto out_xfer;
} }
if (mdc->lazy)
memcpy(pargs_iovs(args), pp->iovs,
sizeof(struct iovec) * pp->nr_iovs);
ret = drain_pages(pp, ctl, args); ret = drain_pages(pp, ctl, args);
if (!ret && !mdc->pre_dump) if (!ret && !mdc->pre_dump)
ret = xfer_pages(pp, &xfer); ret = xfer_pages(pp, &xfer, mdc->lazy);
if (ret) if (ret)
goto out_xfer; goto out_xfer;
...@@ -406,7 +410,7 @@ out_xfer: ...@@ -406,7 +410,7 @@ out_xfer:
if (!mdc->pre_dump) if (!mdc->pre_dump)
xfer.close(&xfer); xfer.close(&xfer);
out_pp: out_pp:
if (ret || !mdc->pre_dump) if (ret || !(mdc->pre_dump || mdc->lazy))
destroy_page_pipe(pp); destroy_page_pipe(pp);
else else
dmpi(item)->mem_pp = pp; dmpi(item)->mem_pp = pp;
......
...@@ -35,6 +35,7 @@ ...@@ -35,6 +35,7 @@
#include "xmalloc.h" #include "xmalloc.h"
#include <compel/plugins/std/syscall-codes.h> #include <compel/plugins/std/syscall-codes.h>
#include "restorer.h" #include "restorer.h"
#include "page-xfer.h"
#undef LOG_PREFIX #undef LOG_PREFIX
#define LOG_PREFIX "lazy-pages: " #define LOG_PREFIX "lazy-pages: "
...@@ -366,7 +367,10 @@ static int uffd_copy_page(struct lazy_pages_info *lpi, __u64 address, ...@@ -366,7 +367,10 @@ static int uffd_copy_page(struct lazy_pages_info *lpi, __u64 address,
struct uffdio_copy uffdio_copy; struct uffdio_copy uffdio_copy;
int rc; int rc;
rc = get_page(lpi, address, dest); if (opts.use_page_server)
rc = get_remote_pages(lpi->pid, address, 1, dest);
else
rc = get_page(lpi, address, dest);
if (rc <= 0) if (rc <= 0)
return rc; return rc;
...@@ -854,6 +858,9 @@ int cr_lazy_pages() ...@@ -854,6 +858,9 @@ int cr_lazy_pages()
if (prepare_uffds(epollfd)) if (prepare_uffds(epollfd))
return -1; return -1;
if (connect_to_page_server())
return -1;
ret = handle_requests(epollfd, events); ret = handle_requests(epollfd, events);
lpi_hash_fini(); lpi_hash_fini();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment