Commit 213faeae authored by Pavel Emelyanov's avatar Pavel Emelyanov

mem: Introduce page server

The page server is a process, that is about to get pages over
the network and put them into pagemap- + pages- images. Right
now what it does is simply get the data and puts it into the
image files. When we have dirty set tracking in the kernel the
page server will have to collect "page changes" and properly
integrate them into images.

Running crtools with page server is like this:

dst_node# crtools page-server --port <port> -D dump/ ...
src_node# crtools dump -t <pid> --page-server --address <dst_node> --port <port> -D dump/ ...

After this images from dst_node/dump/ and src_node/dump/ should
be put into one place and tasks can be restored out of it.
Signed-off-by: 's avatarPavel Emelyanov <xemul@parallels.com>
parent 02fc8695
......@@ -56,6 +56,7 @@
#include "cpu.h"
#include "elf.h"
#include "file-lock.h"
#include "page-xfer.h"
#include "asm/dump.h"
......@@ -1458,6 +1459,9 @@ int cr_dump_tasks(pid_t pid, const struct cr_options *opts)
if (write_img_inventory())
goto err;
if (connect_to_page_server())
goto err;
if (collect_pstree(pid, opts))
goto err;
......
......@@ -12,6 +12,10 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include "asm/types.h"
#include "compiler.h"
......@@ -22,6 +26,7 @@
#include "sk-inet.h"
#include "net.h"
#include "version.h"
#include "page-xfer.h"
struct cr_options opts;
......@@ -102,6 +107,9 @@ int main(int argc, char *argv[])
{ LREMAP_PARAM, no_argument, 0, 41},
{ "shell-job", no_argument, 0, 'j'},
{ "file-locks", no_argument, 0, 'l'},
{ "page-server", no_argument, 0, 50},
{ "address", required_argument, 0, 51},
{ "port", required_argument, 0, 52},
{ },
};
......@@ -220,6 +228,22 @@ int main(int argc, char *argv[])
list_add(&script->node, &opts.scripts);
}
break;
case 50:
opts.use_page_server = true;
break;
case 51:
if (!inet_aton(optarg, &opts.ps_addr.sin_addr)) {
pr_perror("Bad address");
return -1;
}
break;
case 52:
opts.ps_addr.sin_port = htons(atoi(optarg));
if (!opts.ps_addr.sin_port) {
pr_err("Bad port\n");
return -1;
}
break;
case 'j':
opts.shell_job = true;
break;
......@@ -258,6 +282,7 @@ int main(int argc, char *argv[])
strcmp(argv[optind], "restore") &&
strcmp(argv[optind], "show") &&
strcmp(argv[optind], "check") &&
strcmp(argv[optind], "page-server") &&
strcmp(argv[optind], "exec")) {
pr_err("Unknown command %s\n", argv[optind]);
goto usage;
......@@ -285,6 +310,9 @@ int main(int argc, char *argv[])
goto opt_pid_missing;
ret = cr_exec(pid, argv + optind + 1);
break;
case 'p':
ret = cr_page_server();
break;
default:
goto usage;
break;
......@@ -306,6 +334,7 @@ usage:
pr_msg(" show show dump file(s) contents\n");
pr_msg(" check checks whether the kernel support is up-to-date\n");
pr_msg(" exec execute a system call by other task\n");
pr_msg(" page-server launch page server\n");
if (argc < 2) {
pr_msg("\nTry -h|--help for more info\n");
......@@ -352,6 +381,11 @@ usage:
pr_msg(" -vvv same as -v 3\n");
pr_msg(" -vvvv same as -v 4\n");
pr_msg("\nPage server options\n");
pr_msg(" --page-server send pages to page server\n");
pr_msg(" --address [ADDR] address of page server\n");
pr_msg(" --port [PORT] port of page server\n");
pr_msg("\nShow options:\n");
pr_msg(" -f|--file show contents of a checkpoint file\n");
pr_msg(" -D|--images-dir directory where to get images from\n");
......
......@@ -109,6 +109,8 @@ struct cr_options {
char *pidfile;
struct list_head veth_pairs;
struct list_head scripts;
bool use_page_server;
struct sockaddr_in ps_addr;
};
extern struct cr_options opts;
......
#ifndef __CR_PAGE_XFER__H__
#define __CR_PAGE_XFER__H__
int cr_page_server(void);
struct page_xfer {
int (*write_pagemap)(struct page_xfer *self, struct iovec *iov, int pipe);
void (*close)(struct page_xfer *self);
int fd;
union {
int fd_pg;
u64 dst_id;
};
};
int open_page_xfer(struct page_xfer *xfer, int fd_type, long id);
int open_page_server_xfer(struct page_xfer *, int fd_type, long id);
int connect_to_page_server(void);
#endif
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <unistd.h>
#include "crtools.h"
......@@ -6,6 +9,249 @@
#include "protobuf.h"
#include "protobuf/pagemap.pb-c.h"
struct page_server_iov {
u32 cmd;
u32 nr_pages;
u64 vaddr;
u64 dst_id;
};
#define PS_IOV_ADD 1
#define PS_TYPE_BITS 4
#define PS_TYPE_MASK ((1 << PS_TYPE_BITS) - 1)
static inline u64 encode_pm_id(int type, int id)
{
return ((u64)id) << PS_TYPE_BITS | type;
}
static int decode_pm_type(u64 dst_id)
{
return dst_id & PS_TYPE_MASK;
}
static long decode_pm_id(u64 dst_id)
{
return (long)(dst_id >> PS_TYPE_BITS);
}
struct page_xfer_job {
u64 dst_id;
int p[2];
unsigned pipe_size;
struct page_xfer loc_xfer;
};
static struct page_xfer_job cxfer = {
.dst_id = ~0,
};
static int page_server_add(int sk, struct page_server_iov *pi)
{
size_t len;
struct page_xfer *lxfer = &cxfer.loc_xfer;
PagemapEntry pe = PAGEMAP_ENTRY__INIT;
pr_debug("Adding %lx/%u\n", pi->vaddr, pi->nr_pages);
if (cxfer.dst_id != pi->dst_id) {
if (cxfer.dst_id != ~0)
cxfer.loc_xfer.close(&cxfer.loc_xfer);
if (open_page_xfer(&cxfer.loc_xfer,
decode_pm_type(pi->dst_id),
decode_pm_id(pi->dst_id)))
return -1;
cxfer.dst_id = pi->dst_id;
}
pe.vaddr = pi->vaddr;
pe.nr_pages = pi->nr_pages;
if (pb_write_one(lxfer->fd, &pe, PB_PAGEMAP) < 0)
return -1;
len = pe.nr_pages * PAGE_SIZE;
while (len > 0) {
ssize_t ret, chunk;
chunk = len;
if (chunk > cxfer.pipe_size)
chunk = cxfer.pipe_size;
chunk = splice(sk, NULL, cxfer.p[1], NULL, chunk, SPLICE_F_MOVE | SPLICE_F_NONBLOCK);
if (chunk < 0) {
pr_perror("Can't read from socket");
return -1;
}
ret = splice(cxfer.p[0], NULL, lxfer->fd_pg, NULL, chunk, SPLICE_F_MOVE);
if (ret < 0) {
pr_perror("Can't put pages into file");
return -1;
}
if (ret != chunk) {
pr_perror("Partial image write %ld/%ld\n", ret, chunk);
return -1;
}
len -= chunk;
}
return 0;
}
static int page_server_serve(int sk)
{
if (pipe(cxfer.p)) {
pr_perror("Can't make pipe for xfer");
return -1;
}
cxfer.pipe_size = fcntl(cxfer.p[0], F_GETPIPE_SZ, 0);
pr_debug("Created xfer pipe size %u\n", cxfer.pipe_size);
while (1) {
int ret;
struct page_server_iov pi;
ret = read(sk, &pi, sizeof(pi));
if (!ret)
break;
if (ret != sizeof(pi)) {
pr_perror("Can't read pagemap from socket");
return -1;
}
switch (pi.cmd) {
case PS_IOV_ADD:
ret = page_server_add(sk, &pi);
break;
default:
pr_err("Unknown command %u\n", pi.cmd);
ret = -1;
break;
}
if (ret)
return -1;
}
pr_info("Session over\n");
return 0;
}
int cr_page_server(void)
{
int sk, ask;
struct sockaddr_in caddr;
socklen_t clen = sizeof(caddr);
pr_info("Starting page server on port %u\n",
(int)ntohs(opts.ps_addr.sin_port));
sk = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
if (sk < 0) {
pr_perror("Can't init page server\n");
return -1;
}
opts.ps_addr.sin_family = AF_INET;
if (bind(sk, (struct sockaddr *)&opts.ps_addr, sizeof(opts.ps_addr))) {
pr_perror("Can't bind page server\n");
return -1;
}
if (listen(sk, 1)) {
pr_perror("Can't listen on page server socket");
return -1;
}
ask = accept(sk, (struct sockaddr *)&caddr, &clen);
if (ask < 0) {
pr_perror("Can't accept connection to server");
return -1;
}
close(sk);
pr_info("Accepted connection from %s:%u\n",
inet_ntoa(caddr.sin_addr),
(int)ntohs(caddr.sin_port));
return page_server_serve(ask);
}
static int page_server_sk = -1;
int connect_to_page_server(void)
{
if (!opts.use_page_server)
return 0;
pr_info("Connecting to server %s:%u\n",
inet_ntoa(opts.ps_addr.sin_addr),
(int)ntohs(opts.ps_addr.sin_port));
page_server_sk = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
if (page_server_sk < 0) {
pr_perror("Can't create socket\n");
return -1;
}
opts.ps_addr.sin_family = AF_INET;
if (connect(page_server_sk, (struct sockaddr *)&opts.ps_addr,
sizeof(opts.ps_addr)) < 0) {
pr_perror("Can't connect to server\n");
return -1;
}
return 0;
}
static int write_pagemap_to_server(struct page_xfer *xfer,
struct iovec *iov, int p)
{
struct page_server_iov pi;
pi.cmd = PS_IOV_ADD;
pi.dst_id = xfer->dst_id;
pi.vaddr = encode_pointer(iov->iov_base);
pi.nr_pages = iov->iov_len / PAGE_SIZE;
if (write(xfer->fd, &pi, sizeof(pi)) != sizeof(pi)) {
pr_perror("Can't write pagemap to server\n");
return -1;
}
pr_debug("Splicing %lu bytes / %u pages into socket\n", iov->iov_len, pi.nr_pages);
if (splice(p, NULL, xfer->fd, NULL, iov->iov_len,
SPLICE_F_MOVE) != iov->iov_len) {
pr_perror("Can't write pages to socket");
return -1;
}
return 0;
}
static void close_server_xfer(struct page_xfer *xfer)
{
xfer->fd = -1;
}
int open_page_server_xfer(struct page_xfer *xfer, int fd_type, long id)
{
xfer->fd = page_server_sk;
xfer->write_pagemap = write_pagemap_to_server;
xfer->close = close_server_xfer;
xfer->dst_id = encode_pm_id(fd_type, id);
return 0;
}
static int write_pagemap_loc(struct page_xfer *xfer,
struct iovec *iov, int p)
{
......@@ -32,6 +278,9 @@ static void close_page_xfer(struct page_xfer *xfer)
int open_page_xfer(struct page_xfer *xfer, int fd_type, long id)
{
if (opts.use_page_server)
return open_page_server_xfer(xfer, fd_type, id);
xfer->fd = open_image(fd_type, O_DUMP, id);
if (xfer->fd < 0)
return -1;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment