Commit bb98a820 authored by Andrey Vagin's avatar Andrey Vagin Committed by Pavel Emelyanov

page-pipe: split dumping memory on chunks (v3)

The problem is that vmsplice() to a big pipe fails very often.

The kernel allocates a linear chunk of memory for pipe buffer
descriptos, but a big allocation in kernel can fail.

So we need to restrict maximal capacity of pipes. But the number of
pipes is restricted too, so we need to split dumping memory on chunks.

In this patch we calculates the pipe size for which vmsplice() will not
fail.

v2: s/batch/chunk and a few other small fixes
v3: Remove callbacks from page_pipes and reuse pipes
Signed-off-by: 's avatarAndrey Vagin <avagin@openvz.org>
Signed-off-by: 's avatarPavel Emelyanov <xemul@parallels.com>
parent c92c3079
...@@ -81,6 +81,7 @@ struct page_pipe_buf { ...@@ -81,6 +81,7 @@ struct page_pipe_buf {
struct page_pipe { struct page_pipe {
unsigned int nr_pipes; /* how many page_pipe_bufs in there */ unsigned int nr_pipes; /* how many page_pipe_bufs in there */
struct list_head bufs; /* list of bufs */ struct list_head bufs; /* list of bufs */
struct list_head free_bufs; /* list of bufs */
unsigned int nr_iovs; /* number of iovs */ unsigned int nr_iovs; /* number of iovs */
unsigned int free_iov; /* first free iov */ unsigned int free_iov; /* first free iov */
struct iovec *iovs; /* iovs. They are provided into create_page_pipe struct iovec *iovs; /* iovs. They are provided into create_page_pipe
...@@ -89,13 +90,18 @@ struct page_pipe { ...@@ -89,13 +90,18 @@ struct page_pipe {
unsigned int nr_holes; /* number of holes allocated */ unsigned int nr_holes; /* number of holes allocated */
unsigned int free_hole; /* number of holes in use */ unsigned int free_hole; /* number of holes in use */
struct iovec *holes; /* holes */ struct iovec *holes; /* holes */
bool chunk_mode; /* Restrict the maximum buffer size of pipes
and dump memory for a few iterations */
}; };
extern struct page_pipe *create_page_pipe(unsigned int nr, struct iovec *); extern struct page_pipe *create_page_pipe(unsigned int nr,
struct iovec *, bool chunk_mode);
extern void destroy_page_pipe(struct page_pipe *p); extern void destroy_page_pipe(struct page_pipe *p);
extern int page_pipe_add_page(struct page_pipe *p, unsigned long addr); extern int page_pipe_add_page(struct page_pipe *p, unsigned long addr);
extern int page_pipe_add_hole(struct page_pipe *p, unsigned long addr); extern int page_pipe_add_hole(struct page_pipe *p, unsigned long addr);
extern void debug_show_page_pipe(struct page_pipe *pp); extern void debug_show_page_pipe(struct page_pipe *pp);
void page_pipe_reinit(struct page_pipe *pp);
#endif /* __CR_PAGE_PIPE_H__ */ #endif /* __CR_PAGE_PIPE_H__ */
...@@ -220,7 +220,7 @@ static int __parasite_dump_pages_seized(struct parasite_ctl *ctl, ...@@ -220,7 +220,7 @@ static int __parasite_dump_pages_seized(struct parasite_ctl *ctl,
goto out_free; goto out_free;
ret = -1; ret = -1;
pp = create_page_pipe(vma_area_list->priv_size / 2, pargs_iovs(args)); pp = create_page_pipe(vma_area_list->priv_size / 2, pargs_iovs(args), false);
if (!pp) if (!pp)
goto out_close; goto out_close;
......
...@@ -7,12 +7,24 @@ ...@@ -7,12 +7,24 @@
#include "util.h" #include "util.h"
#include "page-pipe.h" #include "page-pipe.h"
/* The number of pipes for one chunk */
#define NR_PIPES_PER_CHUNK 8
static int page_pipe_grow(struct page_pipe *pp) static int page_pipe_grow(struct page_pipe *pp)
{ {
struct page_pipe_buf *ppb; struct page_pipe_buf *ppb;
pr_debug("Will grow page pipe (iov off is %u)\n", pp->free_iov); pr_debug("Will grow page pipe (iov off is %u)\n", pp->free_iov);
if (!list_empty(&pp->free_bufs)) {
ppb = list_first_entry(&pp->free_bufs, struct page_pipe_buf, l);
list_move_tail(&ppb->l, &pp->bufs);
goto out;
}
if (pp->chunk_mode && pp->nr_pipes == NR_PIPES_PER_CHUNK)
return -EAGAIN;
ppb = xmalloc(sizeof(*ppb)); ppb = xmalloc(sizeof(*ppb));
if (!ppb) if (!ppb)
return -1; return -1;
...@@ -24,17 +36,20 @@ static int page_pipe_grow(struct page_pipe *pp) ...@@ -24,17 +36,20 @@ static int page_pipe_grow(struct page_pipe *pp)
} }
ppb->pipe_size = fcntl(ppb->p[0], F_GETPIPE_SZ, 0) / PAGE_SIZE; ppb->pipe_size = fcntl(ppb->p[0], F_GETPIPE_SZ, 0) / PAGE_SIZE;
list_add_tail(&ppb->l, &pp->bufs);
out:
ppb->pages_in = 0; ppb->pages_in = 0;
ppb->nr_segs = 0; ppb->nr_segs = 0;
ppb->iov = &pp->iovs[pp->free_iov]; ppb->iov = &pp->iovs[pp->free_iov];
list_add_tail(&ppb->l, &pp->bufs);
pp->nr_pipes++; pp->nr_pipes++;
return 0; return 0;
} }
struct page_pipe *create_page_pipe(unsigned int nr_segs, struct iovec *iovs) struct page_pipe *create_page_pipe(unsigned int nr_segs,
struct iovec *iovs, bool chunk_mode)
{ {
struct page_pipe *pp; struct page_pipe *pp;
...@@ -44,6 +59,7 @@ struct page_pipe *create_page_pipe(unsigned int nr_segs, struct iovec *iovs) ...@@ -44,6 +59,7 @@ struct page_pipe *create_page_pipe(unsigned int nr_segs, struct iovec *iovs)
if (pp) { if (pp) {
pp->nr_pipes = 0; pp->nr_pipes = 0;
INIT_LIST_HEAD(&pp->bufs); INIT_LIST_HEAD(&pp->bufs);
INIT_LIST_HEAD(&pp->free_bufs);
pp->nr_iovs = nr_segs; pp->nr_iovs = nr_segs;
pp->iovs = iovs; pp->iovs = iovs;
pp->free_iov = 0; pp->free_iov = 0;
...@@ -52,6 +68,8 @@ struct page_pipe *create_page_pipe(unsigned int nr_segs, struct iovec *iovs) ...@@ -52,6 +68,8 @@ struct page_pipe *create_page_pipe(unsigned int nr_segs, struct iovec *iovs)
pp->free_hole = 0; pp->free_hole = 0;
pp->holes = NULL; pp->holes = NULL;
pp->chunk_mode = chunk_mode;
if (page_pipe_grow(pp)) if (page_pipe_grow(pp))
return NULL; return NULL;
} }
...@@ -65,6 +83,7 @@ void destroy_page_pipe(struct page_pipe *pp) ...@@ -65,6 +83,7 @@ void destroy_page_pipe(struct page_pipe *pp)
pr_debug("Killing page pipe\n"); pr_debug("Killing page pipe\n");
list_splice(&pp->free_bufs, &pp->bufs);
list_for_each_entry_safe(ppb, n, &pp->bufs, l) { list_for_each_entry_safe(ppb, n, &pp->bufs, l) {
close(ppb->p[0]); close(ppb->p[0]);
close(ppb->p[1]); close(ppb->p[1]);
...@@ -74,6 +93,40 @@ void destroy_page_pipe(struct page_pipe *pp) ...@@ -74,6 +93,40 @@ void destroy_page_pipe(struct page_pipe *pp)
xfree(pp); xfree(pp);
} }
void page_pipe_reinit(struct page_pipe *pp)
{
struct page_pipe_buf *ppb, *n;
BUG_ON(!pp->chunk_mode);
pr_debug("Clean up page pipe\n");
list_for_each_entry_safe(ppb, n, &pp->bufs, l)
list_move(&ppb->l, &pp->free_bufs);
pp->free_hole = 0;
if (page_pipe_grow(pp))
BUG(); /* It can't fail, because ppb is in free_bufs */
}
#define PAGE_ALLOC_COSTLY_ORDER 3 /* from the kernel source code */
struct kernel_pipe_buffer {
struct page *page;
unsigned int offset, len;
const struct pipe_buf_operations *ops;
unsigned int flags;
unsigned long private;
};
/*
* The kernel allocates the linear chunk of memory for pipe buffers.
* Allocation of chunks with size more than PAGE_ALLOC_COSTLY_ORDER
* fails very often, so we need to restrict the pipe capacity to not
* allocate big chunks.
*/
#define PIPE_MAX_SIZE ((1 << PAGE_ALLOC_COSTLY_ORDER) * PAGE_SIZE / \
sizeof(struct kernel_pipe_buffer))
#define PPB_IOV_BATCH 8 #define PPB_IOV_BATCH 8
static inline int try_add_page_to(struct page_pipe *pp, struct page_pipe_buf *ppb, static inline int try_add_page_to(struct page_pipe *pp, struct page_pipe_buf *ppb,
...@@ -82,9 +135,13 @@ static inline int try_add_page_to(struct page_pipe *pp, struct page_pipe_buf *pp ...@@ -82,9 +135,13 @@ static inline int try_add_page_to(struct page_pipe *pp, struct page_pipe_buf *pp
struct iovec *iov; struct iovec *iov;
if (ppb->pages_in == ppb->pipe_size) { if (ppb->pages_in == ppb->pipe_size) {
unsigned long new_size = ppb->pipe_size << 1;
int ret; int ret;
ret = fcntl(ppb->p[0], F_SETPIPE_SZ, (ppb->pipe_size * PAGE_SIZE) << 1); if (new_size > PIPE_MAX_SIZE)
return 1;
ret = fcntl(ppb->p[0], F_SETPIPE_SZ, new_size * PAGE_SIZE);
if (ret < 0) if (ret < 0)
return 1; /* need to add another buf */ return 1; /* need to add another buf */
......
...@@ -317,7 +317,7 @@ static int dump_one_shmem(struct shmem_info_dump *si) ...@@ -317,7 +317,7 @@ static int dump_one_shmem(struct shmem_info_dump *si)
if (!iovs) if (!iovs)
goto err_unmap; goto err_unmap;
pp = create_page_pipe((nrpages + 1) / 2, iovs); pp = create_page_pipe((nrpages + 1) / 2, iovs, false);
if (!pp) if (!pp)
goto err_iovs; goto err_iovs;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment