Commit a1ed9e09 authored by Mike Rapoport's avatar Mike Rapoport Committed by Andrei Vagin

criu: pagemap: add entries for zero pages

The pages that are mapped to zero_page_pfn are not dumped but information
where are they located is required for lazy restore.
Note that get_pagemap users presumed that zero pages are not a part of the
pagemap and these pages were just silently skipped during memory restore.
At the moment I preserve this semantics and force get_pagemap to skip zero
pages.
Signed-off-by: 's avatarMike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: 's avatarPavel Emelyanov <xemul@virtuozzo.com>
parent b2dafe74
...@@ -101,6 +101,9 @@ struct page_pipe_buf { ...@@ -101,6 +101,9 @@ struct page_pipe_buf {
struct list_head l; /* links into page_pipe->bufs */ struct list_head l; /* links into page_pipe->bufs */
}; };
#define PP_HOLE_PARENT (1 << 0)
#define PP_HOLE_ZERO (1 << 1)
struct page_pipe { struct page_pipe {
unsigned int nr_pipes; /* how many page_pipe_bufs in there */ unsigned int nr_pipes; /* how many page_pipe_bufs in there */
struct list_head bufs; /* list of bufs */ struct list_head bufs; /* list of bufs */
...@@ -113,6 +116,7 @@ struct page_pipe { ...@@ -113,6 +116,7 @@ struct page_pipe {
unsigned int nr_holes; /* number of holes allocated */ unsigned int nr_holes; /* number of holes allocated */
unsigned int free_hole; /* number of holes in use */ unsigned int free_hole; /* number of holes in use */
struct iovec *holes; /* holes */ struct iovec *holes; /* holes */
unsigned int *hole_flags;
unsigned flags; /* PP_FOO flags below */ unsigned flags; /* PP_FOO flags below */
}; };
...@@ -124,7 +128,8 @@ struct page_pipe *create_page_pipe(unsigned int nr_segs, struct iovec *iovs, uns ...@@ -124,7 +128,8 @@ struct page_pipe *create_page_pipe(unsigned int nr_segs, struct iovec *iovs, uns
extern void destroy_page_pipe(struct page_pipe *p); extern void destroy_page_pipe(struct page_pipe *p);
extern int page_pipe_add_page(struct page_pipe *p, unsigned long addr, extern int page_pipe_add_page(struct page_pipe *p, unsigned long addr,
unsigned int flags); unsigned int flags);
extern int page_pipe_add_hole(struct page_pipe *p, unsigned long addr); extern int page_pipe_add_hole(struct page_pipe *pp, unsigned long addr,
unsigned int flags);
extern void debug_show_page_pipe(struct page_pipe *pp); extern void debug_show_page_pipe(struct page_pipe *pp);
void page_pipe_reinit(struct page_pipe *pp); void page_pipe_reinit(struct page_pipe *pp);
......
...@@ -16,7 +16,7 @@ struct page_xfer { ...@@ -16,7 +16,7 @@ struct page_xfer {
/* transfers pages related to previous pagemap */ /* transfers pages related to previous pagemap */
int (*write_pages)(struct page_xfer *self, int pipe, unsigned long len); int (*write_pages)(struct page_xfer *self, int pipe, unsigned long len);
/* transfers one hole -- vaddr:len entry w/o pages */ /* transfers one hole -- vaddr:len entry w/o pages */
int (*write_hole)(struct page_xfer *self, struct iovec *iov); int (*write_hole)(struct page_xfer *self, struct iovec *iov, int type);
void (*close)(struct page_xfer *self); void (*close)(struct page_xfer *self);
/* private data for every page-xfer engine */ /* private data for every page-xfer engine */
......
...@@ -25,6 +25,7 @@ enum { ...@@ -25,6 +25,7 @@ enum {
CNT_PAGES_SCANNED, CNT_PAGES_SCANNED,
CNT_PAGES_SKIPPED_PARENT, CNT_PAGES_SKIPPED_PARENT,
CNT_PAGES_WRITTEN, CNT_PAGES_WRITTEN,
CNT_PAGES_ZERO,
DUMP_CNT_NR_STATS, DUMP_CNT_NR_STATS,
}; };
......
...@@ -109,14 +109,17 @@ bool should_dump_page(VmaEntry *vmae, u64 pme) ...@@ -109,14 +109,17 @@ bool should_dump_page(VmaEntry *vmae, u64 pme)
return false; return false;
if (vma_entry_is(vmae, VMA_AREA_AIORING)) if (vma_entry_is(vmae, VMA_AREA_AIORING))
return true; return true;
if (pme & PME_SWAP) if (pme & (PME_PRESENT | PME_SWAP))
return true;
if ((pme & PME_PRESENT) && ((pme & PME_PFRAME_MASK) != kdat.zero_page_pfn))
return true; return true;
return false; return false;
} }
static inline bool page_is_zero(u64 pme)
{
return (pme & PME_PFRAME_MASK) == kdat.zero_page_pfn;
}
bool page_in_parent(bool dirty) bool page_in_parent(bool dirty)
{ {
/* /*
...@@ -140,7 +143,7 @@ static int generate_iovs(struct vma_area *vma, struct page_pipe *pp, u64 *map, u ...@@ -140,7 +143,7 @@ static int generate_iovs(struct vma_area *vma, struct page_pipe *pp, u64 *map, u
{ {
u64 *at = &map[PAGE_PFN(*off)]; u64 *at = &map[PAGE_PFN(*off)];
unsigned long pfn, nr_to_scan; unsigned long pfn, nr_to_scan;
unsigned long pages[2] = {}; unsigned long pages[3] = {};
nr_to_scan = (vma_area_len(vma) - *off) / PAGE_SIZE; nr_to_scan = (vma_area_len(vma) - *off) / PAGE_SIZE;
...@@ -164,12 +167,15 @@ static int generate_iovs(struct vma_area *vma, struct page_pipe *pp, u64 *map, u ...@@ -164,12 +167,15 @@ static int generate_iovs(struct vma_area *vma, struct page_pipe *pp, u64 *map, u
* page. The latter would be checked in page-xfer. * page. The latter would be checked in page-xfer.
*/ */
if (has_parent && page_in_parent(at[pfn] & PME_SOFT_DIRTY)) { if (page_is_zero(at[pfn])) {
ret = page_pipe_add_hole(pp, vaddr); ret = page_pipe_add_hole(pp, vaddr, PP_HOLE_ZERO);
pages[0]++; pages[0]++;
} else if (has_parent && page_in_parent(at[pfn] & PME_SOFT_DIRTY)) {
ret = page_pipe_add_hole(pp, vaddr, PP_HOLE_PARENT);
pages[1]++;
} else { } else {
ret = page_pipe_add_page(pp, vaddr, ppb_flags); ret = page_pipe_add_page(pp, vaddr, ppb_flags);
pages[1]++; pages[2]++;
} }
if (ret) { if (ret) {
...@@ -181,10 +187,12 @@ static int generate_iovs(struct vma_area *vma, struct page_pipe *pp, u64 *map, u ...@@ -181,10 +187,12 @@ static int generate_iovs(struct vma_area *vma, struct page_pipe *pp, u64 *map, u
*off += pfn * PAGE_SIZE; *off += pfn * PAGE_SIZE;
cnt_add(CNT_PAGES_SCANNED, nr_to_scan); cnt_add(CNT_PAGES_SCANNED, nr_to_scan);
cnt_add(CNT_PAGES_SKIPPED_PARENT, pages[0]); cnt_add(CNT_PAGES_ZERO, pages[0]);
cnt_add(CNT_PAGES_WRITTEN, pages[1]); cnt_add(CNT_PAGES_SKIPPED_PARENT, pages[1]);
cnt_add(CNT_PAGES_WRITTEN, pages[2]);
pr_info("Pagemap generated: %lu pages %lu holes\n", pages[1], pages[0]); pr_info("Pagemap generated: %lu pages %lu holes %lu zeros\n",
pages[2], pages[1], pages[0]);
return 0; return 0;
} }
......
...@@ -269,7 +269,8 @@ int page_pipe_add_page(struct page_pipe *pp, unsigned long addr, ...@@ -269,7 +269,8 @@ int page_pipe_add_page(struct page_pipe *pp, unsigned long addr,
#define PP_HOLES_BATCH 32 #define PP_HOLES_BATCH 32
int page_pipe_add_hole(struct page_pipe *pp, unsigned long addr) int page_pipe_add_hole(struct page_pipe *pp, unsigned long addr,
unsigned int flags)
{ {
if (pp->free_hole >= pp->nr_holes) { if (pp->free_hole >= pp->nr_holes) {
pp->holes = xrealloc(pp->holes, pp->holes = xrealloc(pp->holes,
...@@ -277,15 +278,23 @@ int page_pipe_add_hole(struct page_pipe *pp, unsigned long addr) ...@@ -277,15 +278,23 @@ int page_pipe_add_hole(struct page_pipe *pp, unsigned long addr)
if (!pp->holes) if (!pp->holes)
return -1; return -1;
pp->hole_flags = xrealloc(pp->hole_flags,
(pp->nr_holes + PP_HOLES_BATCH) * sizeof(unsigned int));
if(!pp->hole_flags)
return -1;
pp->nr_holes += PP_HOLES_BATCH; pp->nr_holes += PP_HOLES_BATCH;
} }
if (pp->free_hole && if (pp->free_hole &&
iov_grow_page(&pp->holes[pp->free_hole - 1], addr)) pp->hole_flags[pp->free_hole - 1] == flags &&
iov_grow_page(&pp->holes[pp->free_hole - 1], addr))
goto out; goto out;
iov_init(&pp->holes[pp->free_hole++], addr); iov_init(&pp->holes[pp->free_hole++], addr);
pp->hole_flags[pp->free_hole - 1] = flags;
out: out:
return 0; return 0;
} }
......
...@@ -38,6 +38,7 @@ static void psi2iovec(struct page_server_iov *ps, struct iovec *iov) ...@@ -38,6 +38,7 @@ static void psi2iovec(struct page_server_iov *ps, struct iovec *iov)
#define PS_IOV_OPEN 3 #define PS_IOV_OPEN 3
#define PS_IOV_OPEN2 4 #define PS_IOV_OPEN2 4
#define PS_IOV_PARENT 5 #define PS_IOV_PARENT 5
#define PS_IOV_ZERO 6
#define PS_IOV_FLUSH 0x1023 #define PS_IOV_FLUSH 0x1023
#define PS_IOV_FLUSH_N_CLOSE 0x1024 #define PS_IOV_FLUSH_N_CLOSE 0x1024
...@@ -149,9 +150,10 @@ static int write_pages_to_server(struct page_xfer *xfer, ...@@ -149,9 +150,10 @@ static int write_pages_to_server(struct page_xfer *xfer,
return 0; return 0;
} }
static int write_hole_to_server(struct page_xfer *xfer, struct iovec *iov) static int write_hole_to_server(struct page_xfer *xfer, struct iovec *iov,
int type)
{ {
return send_iov(xfer->sk, PS_IOV_HOLE, xfer->dst_id, iov); return send_iov(xfer->sk, type, xfer->dst_id, iov);
} }
static void close_server_xfer(struct page_xfer *xfer) static void close_server_xfer(struct page_xfer *xfer)
...@@ -276,25 +278,36 @@ static int check_pagehole_in_parent(struct page_read *p, struct iovec *iov) ...@@ -276,25 +278,36 @@ static int check_pagehole_in_parent(struct page_read *p, struct iovec *iov)
} }
} }
static int write_pagehole_loc(struct page_xfer *xfer, struct iovec *iov) static int write_hole_loc(struct page_xfer *xfer, struct iovec *iov, int type)
{ {
PagemapEntry pe = PAGEMAP_ENTRY__INIT; PagemapEntry pe = PAGEMAP_ENTRY__INIT;
if (xfer->parent != NULL) { pe.vaddr = encode_pointer(iov->iov_base);
int ret; pe.nr_pages = iov->iov_len / PAGE_SIZE;
ret = check_pagehole_in_parent(xfer->parent, iov); switch (type) {
if (ret) { case PS_IOV_HOLE:
pr_err("Hole %p/%zu not found in parent\n", if (xfer->parent != NULL) {
iov->iov_base, iov->iov_len); int ret;
return -1;
ret = check_pagehole_in_parent(xfer->parent, iov);
if (ret) {
pr_err("Hole %p/%zu not found in parent\n",
iov->iov_base, iov->iov_len);
return -1;
}
} }
}
pe.vaddr = encode_pointer(iov->iov_base); pe.has_in_parent = true;
pe.nr_pages = iov->iov_len / PAGE_SIZE; pe.in_parent = true;
pe.has_in_parent = true; break;
pe.in_parent = true; case PS_IOV_ZERO:
pe.has_zero = true;
pe.zero = true;
break;
default:
return -1;
}
if (pb_write_one(xfer->pmi, &pe, PB_PAGEMAP) < 0) if (pb_write_one(xfer->pmi, &pe, PB_PAGEMAP) < 0)
return -1; return -1;
...@@ -364,7 +377,7 @@ static int open_page_local_xfer(struct page_xfer *xfer, int fd_type, long id) ...@@ -364,7 +377,7 @@ static int open_page_local_xfer(struct page_xfer *xfer, int fd_type, long id)
out: out:
xfer->write_pagemap = write_pagemap_loc; xfer->write_pagemap = write_pagemap_loc;
xfer->write_pages = write_pages_loc; xfer->write_pages = write_pages_loc;
xfer->write_hole = write_pagehole_loc; xfer->write_hole = write_hole_loc;
xfer->close = close_page_xfer; xfer->close = close_page_xfer;
return 0; return 0;
} }
...@@ -378,19 +391,33 @@ int open_page_xfer(struct page_xfer *xfer, int fd_type, long id) ...@@ -378,19 +391,33 @@ int open_page_xfer(struct page_xfer *xfer, int fd_type, long id)
} }
static int page_xfer_dump_hole(struct page_xfer *xfer, static int page_xfer_dump_hole(struct page_xfer *xfer,
struct iovec *hole, unsigned long off) struct iovec *hole, unsigned long off, int type)
{ {
BUG_ON(hole->iov_base < (void *)off); BUG_ON(hole->iov_base < (void *)off);
hole->iov_base -= off; hole->iov_base -= off;
pr_debug("\th %p [%u]\n", hole->iov_base, pr_debug("\th %p [%u]\n", hole->iov_base,
(unsigned int)(hole->iov_len / PAGE_SIZE)); (unsigned int)(hole->iov_len / PAGE_SIZE));
if (xfer->write_hole(xfer, hole)) if (xfer->write_hole(xfer, hole, type))
return -1; return -1;
return 0; return 0;
} }
static int get_hole_type(struct page_pipe *pp, int n)
{
unsigned int hole_flags = pp->hole_flags[n];
if (hole_flags == PP_HOLE_PARENT)
return PS_IOV_HOLE;
if (hole_flags == PP_HOLE_ZERO)
return PS_IOV_ZERO;
else
BUG();
return -1;
}
static int dump_holes(struct page_xfer *xfer, struct page_pipe *pp, static int dump_holes(struct page_xfer *xfer, struct page_pipe *pp,
unsigned int *cur_hole, void *limit, unsigned long off) unsigned int *cur_hole, void *limit, unsigned long off)
{ {
...@@ -398,11 +425,12 @@ static int dump_holes(struct page_xfer *xfer, struct page_pipe *pp, ...@@ -398,11 +425,12 @@ static int dump_holes(struct page_xfer *xfer, struct page_pipe *pp,
for (; *cur_hole < pp->free_hole ; (*cur_hole)++) { for (; *cur_hole < pp->free_hole ; (*cur_hole)++) {
struct iovec hole = pp->holes[*cur_hole]; struct iovec hole = pp->holes[*cur_hole];
int hole_type = get_hole_type(pp, *cur_hole);
if (limit && hole.iov_base >= limit) if (limit && hole.iov_base >= limit)
break; break;
ret = page_xfer_dump_hole(xfer, &hole, off); ret = page_xfer_dump_hole(xfer, &hole, off, hole_type);
if (ret) if (ret)
return ret; return ret;
} }
...@@ -655,7 +683,7 @@ static int page_server_hole(int sk, struct page_server_iov *pi) ...@@ -655,7 +683,7 @@ static int page_server_hole(int sk, struct page_server_iov *pi)
return -1; return -1;
psi2iovec(pi, &iov); psi2iovec(pi, &iov);
if (lxfer->write_hole(lxfer, &iov)) if (lxfer->write_hole(lxfer, &iov, pi->cmd))
return -1; return -1;
return 0; return 0;
...@@ -711,6 +739,7 @@ static int page_server_serve(int sk) ...@@ -711,6 +739,7 @@ static int page_server_serve(int sk)
ret = page_server_add(sk, &pi); ret = page_server_add(sk, &pi);
break; break;
case PS_IOV_HOLE: case PS_IOV_HOLE:
case PS_IOV_ZERO:
ret = page_server_hole(sk, &pi); ret = page_server_hole(sk, &pi);
break; break;
case PS_IOV_FLUSH: case PS_IOV_FLUSH:
......
...@@ -123,13 +123,19 @@ int dedup_one_iovec(struct page_read *pr, unsigned long off, unsigned long len) ...@@ -123,13 +123,19 @@ int dedup_one_iovec(struct page_read *pr, unsigned long off, unsigned long len)
static int advance(struct page_read *pr) static int advance(struct page_read *pr)
{ {
pr->curr_pme++; for (;;) {
if (pr->curr_pme >= pr->nr_pmes) pr->curr_pme++;
return 0; if (pr->curr_pme >= pr->nr_pmes)
return 0;
pe = pr->pmes[pr->curr_pme];
if (!pe->zero)
break;
}
pr->pe = pr->pmes[pr->curr_pme]; pr->pe = pr->pmes[pr->curr_pme];
pr->cvaddr = pr->pe->vaddr; pr->cvaddr = pr->pe->vaddr;
return 1; return 1;
} }
...@@ -138,7 +144,7 @@ static void skip_pagemap_pages(struct page_read *pr, unsigned long len) ...@@ -138,7 +144,7 @@ static void skip_pagemap_pages(struct page_read *pr, unsigned long len)
if (!len) if (!len)
return; return;
if (!pr->pe->in_parent) if (!pr->pe->in_parent && !pr->pe->zero)
pr->pi_off += len; pr->pi_off += len;
pr->cvaddr += len; pr->cvaddr += len;
} }
...@@ -412,6 +418,9 @@ static int read_pagemap_page(struct page_read *pr, unsigned long vaddr, int nr, ...@@ -412,6 +418,9 @@ static int read_pagemap_page(struct page_read *pr, unsigned long vaddr, int nr,
if (pr->pe->in_parent) { if (pr->pe->in_parent) {
if (read_parent_page(pr, vaddr, nr, buf, flags) < 0) if (read_parent_page(pr, vaddr, nr, buf, flags) < 0)
return -1; return -1;
} else if (pr->pe->zero) {
/* zero mappings should be skipped by get_pagemap */
BUG();
} else { } else {
if (maybe_read_page(pr, vaddr, nr, buf, flags) < 0) if (maybe_read_page(pr, vaddr, nr, buf, flags) < 0)
return -1; return -1;
......
...@@ -218,6 +218,8 @@ static void update_shmem_pmaps(struct shmem_info *si, u64 *map, VmaEntry *vma) ...@@ -218,6 +218,8 @@ static void update_shmem_pmaps(struct shmem_info *si, u64 *map, VmaEntry *vma)
shmem_pfn = vma_pfn + DIV_ROUND_UP(vma->pgoff, PAGE_SIZE); shmem_pfn = vma_pfn + DIV_ROUND_UP(vma->pgoff, PAGE_SIZE);
if (map[vma_pfn] & PME_SOFT_DIRTY) if (map[vma_pfn] & PME_SOFT_DIRTY)
set_pstate(si->pstate_map, shmem_pfn, PST_DIRTY); set_pstate(si->pstate_map, shmem_pfn, PST_DIRTY);
else if (page_is_zero(map[vma_pfn]))
set_pstate(si->pstate_map, shmem_pfn, PST_ZERO);
else else
set_pstate(si->pstate_map, shmem_pfn, PST_DUMP); set_pstate(si->pstate_map, shmem_pfn, PST_DUMP);
} }
...@@ -709,9 +711,9 @@ static int do_dump_one_shmem(int fd, void *addr, struct shmem_info *si) ...@@ -709,9 +711,9 @@ static int do_dump_one_shmem(int fd, void *addr, struct shmem_info *si)
pgaddr = (unsigned long)addr + pfn * PAGE_SIZE; pgaddr = (unsigned long)addr + pfn * PAGE_SIZE;
again: again:
if (pgstate == PST_ZERO) if (pgstate == PST_ZERO)
ret = 0; ret = page_pipe_add_hole(pp, pgaddr, PP_HOLE_ZERO);
else if (xfer.parent && page_in_parent(pgstate == PST_DIRTY)) else if (xfer.parent && page_in_parent(pgstate == PST_DIRTY))
ret = page_pipe_add_hole(pp, pgaddr); ret = page_pipe_add_hole(pp, pgaddr, PP_HOLE_PARENT);
else else
ret = page_pipe_add_page(pp, pgaddr, 0); ret = page_pipe_add_page(pp, pgaddr, 0);
......
...@@ -157,6 +157,7 @@ void write_stats(int what) ...@@ -157,6 +157,7 @@ void write_stats(int what)
ds_entry.pages_scanned = dstats->counts[CNT_PAGES_SCANNED]; ds_entry.pages_scanned = dstats->counts[CNT_PAGES_SCANNED];
ds_entry.pages_skipped_parent = dstats->counts[CNT_PAGES_SKIPPED_PARENT]; ds_entry.pages_skipped_parent = dstats->counts[CNT_PAGES_SKIPPED_PARENT];
ds_entry.pages_written = dstats->counts[CNT_PAGES_WRITTEN]; ds_entry.pages_written = dstats->counts[CNT_PAGES_WRITTEN];
ds_entry.pages_zero = dstats->counts[CNT_PAGES_ZERO];
name = "dump"; name = "dump";
} else if (what == RESTORE_STATS) { } else if (what == RESTORE_STATS) {
......
...@@ -10,4 +10,5 @@ message pagemap_entry { ...@@ -10,4 +10,5 @@ message pagemap_entry {
required uint64 vaddr = 1 [(criu).hex = true]; required uint64 vaddr = 1 [(criu).hex = true];
required uint32 nr_pages = 2; required uint32 nr_pages = 2;
optional bool in_parent = 3; optional bool in_parent = 3;
optional bool zero = 4;
} }
...@@ -12,6 +12,8 @@ message dump_stats_entry { ...@@ -12,6 +12,8 @@ message dump_stats_entry {
required uint64 pages_written = 7; required uint64 pages_written = 7;
optional uint32 irmap_resolve = 8; optional uint32 irmap_resolve = 8;
required uint64 pages_zero = 9;
} }
message restore_stats_entry { message restore_stats_entry {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment