Commit 5087faa0 authored by Pavel Emelyanov's avatar Pavel Emelyanov

SCM: Dump and restore SCM_RIGHTs

Most of the pieces has already been described in the previous patches :)
so here's the summary.

* Dump:

When receiving a message, also receive any SCM-s (already there) and when
SCM_RIGHTs one is met -- go ahead and just dump received descriptors using
regular code, but taking current as the victim task.

Few words about file paths resolution -- since we do dump path-ed files
by receiving them from victim's parasite, such files sent via sockets
should still work OK, as we still receive them, just from another socket.

Several problems here:

1. Unix sockets sent via unix sockets form knots. Not supported.
2. Eventpolls sent via unix might themseves poll unix sockets. Knots
   again. Not supported either.

* Restore:

On restore we need to make unix socket wait for the soon-to-be-scm-sent
descriptors to get restored, so we need to find them, then put a dependency.
After that, the fake fdinfo entry is attached to the respective file
descs, when sent the respective descriptors are closed.

https://github.com/xemul/criu/issues/251

v2: Addressed comments from Kirill

* Moved prepare_scms before adding fake fles (with comment)
* Add scm-only fles as fake, thus removing close_scm_fds
* Try hard finding any suitable fle to use as scm one when
  queuing them for unix socket scm list, only allocate a new
  one if really needed
Reviewed-by: 's avatarKirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: 's avatarPavel Emelyanov <xemul@virtuozzo.com>
Signed-off-by: 's avatarAndrei Vagin <avagin@virtuozzo.com>
parent e52341f5
...@@ -366,6 +366,14 @@ static int root_prepare_shared(void) ...@@ -366,6 +366,14 @@ static int root_prepare_shared(void)
if (ret) if (ret)
goto err; goto err;
/*
* This should be called with all packets collected AND all
* fdescs and fles prepared BUT post-prep-s not run.
*/
ret = prepare_scms();
if (ret)
goto err;
ret = run_post_prepare(); ret = run_post_prepare();
if (ret) if (ret)
goto err; goto err;
......
...@@ -37,6 +37,8 @@ extern int collect_sockets(struct ns_id *); ...@@ -37,6 +37,8 @@ extern int collect_sockets(struct ns_id *);
extern struct collect_image_info inet_sk_cinfo; extern struct collect_image_info inet_sk_cinfo;
extern struct collect_image_info unix_sk_cinfo; extern struct collect_image_info unix_sk_cinfo;
extern int fix_external_unix_sockets(void); extern int fix_external_unix_sockets(void);
extern int prepare_scms(void);
extern int unix_note_scm_rights(int id_for, uint32_t *file_ids, int *fds, int n_ids);
extern struct collect_image_info netlink_sk_cinfo; extern struct collect_image_info netlink_sk_cinfo;
......
...@@ -18,9 +18,9 @@ ...@@ -18,9 +18,9 @@
#include "util.h" #include "util.h"
#include "util-pie.h" #include "util-pie.h"
#include "sockets.h" #include "sockets.h"
#include "xmalloc.h"
#include "sk-queue.h" #include "sk-queue.h"
#include "files.h"
#include "protobuf.h" #include "protobuf.h"
#include "images/sk-packet.pb-c.h" #include "images/sk-packet.pb-c.h"
...@@ -28,6 +28,8 @@ struct sk_packet { ...@@ -28,6 +28,8 @@ struct sk_packet {
struct list_head list; struct list_head list;
SkPacketEntry *entry; SkPacketEntry *entry;
char *data; char *data;
unsigned scm_len;
int *scm;
}; };
static LIST_HEAD(packets_list); static LIST_HEAD(packets_list);
...@@ -37,11 +39,21 @@ static int collect_one_packet(void *obj, ProtobufCMessage *msg, struct cr_img *i ...@@ -37,11 +39,21 @@ static int collect_one_packet(void *obj, ProtobufCMessage *msg, struct cr_img *i
struct sk_packet *pkt = obj; struct sk_packet *pkt = obj;
pkt->entry = pb_msg(msg, SkPacketEntry); pkt->entry = pb_msg(msg, SkPacketEntry);
pkt->scm = NULL;
pkt->data = xmalloc(pkt->entry->length); pkt->data = xmalloc(pkt->entry->length);
if (pkt->data ==NULL) if (pkt->data ==NULL)
return -1; return -1;
/*
* See dump_packet_cmsg() -- only SCM_RIGHTS are supported and
* only 1 of that kind is possible, thus not more than 1 SCMs
* on a packet.
*/
if (pkt->entry->n_scm > 1) {
pr_err("More than 1 SCM is not possible\n");
return -1;
}
/* /*
* NOTE: packet must be added to the tail. Otherwise sequence * NOTE: packet must be added to the tail. Otherwise sequence
* will be broken. * will be broken.
...@@ -64,6 +76,50 @@ struct collect_image_info sk_queues_cinfo = { ...@@ -64,6 +76,50 @@ struct collect_image_info sk_queues_cinfo = {
.collect = collect_one_packet, .collect = collect_one_packet,
}; };
static int dump_scm_rights(struct cmsghdr *ch, SkPacketEntry *pe)
{
int nr_fds, *fds, i;
void *buf;
ScmEntry *scme;
nr_fds = (ch->cmsg_len - sizeof(*ch)) / sizeof(int);
fds = (int *)CMSG_DATA(ch);
buf = xmalloc(sizeof(ScmEntry) + nr_fds * sizeof(uint32_t));
if (!buf)
return -1;
scme = xptr_pull(&buf, ScmEntry);
scm_entry__init(scme);
scme->type = SCM_RIGHTS;
scme->n_rights = nr_fds;
scme->rights = xptr_pull_s(&buf, nr_fds * sizeof(uint32_t));
for (i = 0; i < nr_fds; i++) {
int ftyp;
if (dump_my_file(fds[i], &scme->rights[i], &ftyp))
return -1;
/*
* Unix sent over Unix or Epoll with some other sh*t
* sent over unix (maybe with this very unix polled)
* are tricky and not supported for now. (XXX -- todo)
*/
if (ftyp == FD_TYPES__UNIXSK || ftyp == FD_TYPES__EVENTPOLL) {
pr_err("Can't dump send %d (unix/epoll) fd\n", ftyp);
return -1;
}
}
i = pe->n_scm++;
if (xrealloc_safe(&pe->scm, pe->n_scm * sizeof(ScmEntry*)))
return -1;
pe->scm[i] = scme;
return 0;
}
/* /*
* Maximum size of the control messages. XXX -- is there any * Maximum size of the control messages. XXX -- is there any
* way to get this value out of the kernel? * way to get this value out of the kernel?
...@@ -73,8 +129,26 @@ struct collect_image_info sk_queues_cinfo = { ...@@ -73,8 +129,26 @@ struct collect_image_info sk_queues_cinfo = {
static int dump_packet_cmsg(struct msghdr *mh, SkPacketEntry *pe) static int dump_packet_cmsg(struct msghdr *mh, SkPacketEntry *pe)
{ {
struct cmsghdr *ch; struct cmsghdr *ch;
int n_rights = 0;
for (ch = CMSG_FIRSTHDR(mh); ch; ch = CMSG_NXTHDR(mh, ch)) { for (ch = CMSG_FIRSTHDR(mh); ch; ch = CMSG_NXTHDR(mh, ch)) {
if (ch->cmsg_type == SCM_RIGHTS) {
if (n_rights) {
/*
* Even if user is sending more than one cmsg with
* rights, kernel merges them alltogether on recv.
*/
pr_err("Unexpected 2nd SCM_RIGHTS from the kernel\n");
return -1;
}
if (dump_scm_rights(ch, pe))
return -1;
n_rights++;
continue;
}
pr_err("Control messages in queue, not supported\n"); pr_err("Control messages in queue, not supported\n");
return -1; return -1;
} }
...@@ -82,6 +156,18 @@ static int dump_packet_cmsg(struct msghdr *mh, SkPacketEntry *pe) ...@@ -82,6 +156,18 @@ static int dump_packet_cmsg(struct msghdr *mh, SkPacketEntry *pe)
return 0; return 0;
} }
static void release_cmsg(SkPacketEntry *pe)
{
int i;
for (i = 0; i < pe->n_scm; i++)
xfree(pe->scm[i]);
xfree(pe->scm);
pe->n_scm = 0;
pe->scm = NULL;
}
int dump_sk_queue(int sock_fd, int sock_id) int dump_sk_queue(int sock_fd, int sock_id)
{ {
SkPacketEntry pe = SK_PACKET_ENTRY__INIT; SkPacketEntry pe = SK_PACKET_ENTRY__INIT;
...@@ -181,6 +267,9 @@ int dump_sk_queue(int sock_fd, int sock_id) ...@@ -181,6 +267,9 @@ int dump_sk_queue(int sock_fd, int sock_id)
ret = -EIO; ret = -EIO;
goto err_set_sock; goto err_set_sock;
} }
if (pe.scm)
release_cmsg(&pe);
} }
ret = 0; ret = 0;
...@@ -209,6 +298,11 @@ static int send_one_pkt(int fd, struct sk_packet *pkt) ...@@ -209,6 +298,11 @@ static int send_one_pkt(int fd, struct sk_packet *pkt)
iov.iov_base = pkt->data; iov.iov_base = pkt->data;
iov.iov_len = entry->length; iov.iov_len = entry->length;
if (pkt->scm != NULL) {
mh.msg_controllen = pkt->scm_len;
mh.msg_control = pkt->scm;
}
/* /*
* Don't try to use sendfile here, because it use sendpage() and * Don't try to use sendfile here, because it use sendpage() and
* all data are split on pages and a new skb is allocated for * all data are split on pages and a new skb is allocated for
...@@ -264,3 +358,43 @@ int restore_sk_queue(int fd, unsigned int peer_id) ...@@ -264,3 +358,43 @@ int restore_sk_queue(int fd, unsigned int peer_id)
out: out:
return ret; return ret;
} }
int prepare_scms(void)
{
struct sk_packet *pkt;
pr_info("Preparing SCMs\n");
list_for_each_entry(pkt, &packets_list, list) {
SkPacketEntry *pe = pkt->entry;
ScmEntry *se;
struct cmsghdr *ch;
if (!pe->n_scm)
continue;
se = pe->scm[0]; /* Only 1 SCM is possible */
if (se->type == SCM_RIGHTS) {
pkt->scm_len = CMSG_SPACE(se->n_rights * sizeof(int));
pkt->scm = xmalloc(pkt->scm_len);
if (!pkt->scm)
return -1;
ch = (struct cmsghdr *)pkt->scm; /* FIXME -- via msghdr */
ch->cmsg_level = SOL_SOCKET;
ch->cmsg_type = SCM_RIGHTS;
ch->cmsg_len = CMSG_LEN(se->n_rights * sizeof(int));
if (unix_note_scm_rights(pe->id_for, se->rights,
(int *)CMSG_DATA(ch), se->n_rights))
return -1;
continue;
}
pr_err("Unsupported scm %d in image\n", se->type);
return -1;
}
return 0;
}
...@@ -795,6 +795,7 @@ struct unix_sk_info { ...@@ -795,6 +795,7 @@ struct unix_sk_info {
struct file_desc d; struct file_desc d;
struct list_head connected; /* List of sockets, connected to me */ struct list_head connected; /* List of sockets, connected to me */
struct list_head node; /* To link in peer's connected list */ struct list_head node; /* To link in peer's connected list */
struct list_head scm_fles;
/* /*
* For DGRAM sockets with queues, we should only restore the queue * For DGRAM sockets with queues, we should only restore the queue
...@@ -806,6 +807,11 @@ struct unix_sk_info { ...@@ -806,6 +807,11 @@ struct unix_sk_info {
u8 listen:1; u8 listen:1;
}; };
struct scm_fle {
struct list_head l;
struct fdinfo_list_entry *fle;
};
#define USK_PAIR_MASTER 0x1 #define USK_PAIR_MASTER 0x1
#define USK_PAIR_SLAVE 0x2 #define USK_PAIR_SLAVE 0x2
...@@ -821,6 +827,141 @@ static struct unix_sk_info *find_unix_sk_by_ino(int ino) ...@@ -821,6 +827,141 @@ static struct unix_sk_info *find_unix_sk_by_ino(int ino)
return NULL; return NULL;
} }
static struct unix_sk_info *find_queuer_for(int id)
{
struct unix_sk_info *ui;
list_for_each_entry(ui, &unix_sockets, list) {
if (ui->queuer == id)
return ui;
}
return NULL;
}
static struct fdinfo_list_entry *get_fle_for_scm(struct file_desc *tgt,
struct pstree_item *owner)
{
struct fdinfo_list_entry *fle;
FdinfoEntry *e = NULL;
int fd;
list_for_each_entry(fle, &tgt->fd_info_head, desc_list) {
if (fle->task == owner)
/*
* Owner already has this file in its fdtable.
* Just use one.
*/
return fle;
e = fle->fe; /* keep any for further reference */
}
/*
* Some other task restores this file. Pretend that
* we're another user of it.
*/
fd = find_unused_fd(owner, -1);
pr_info("`- will add SCM-only %d fd\n", fd);
if (e != NULL) {
e = dup_fdinfo(e, fd, 0);
if (!e) {
pr_err("Can't duplicate fdinfo for scm\n");
return NULL;
}
} else {
/*
* This can happen if the file in question is
* sent over the socket and closed. In this case
* we need to ... invent a new one!
*/
e = xmalloc(sizeof(*e));
if (!e)
return NULL;
fdinfo_entry__init(e);
e->id = tgt->id;
e->type = tgt->ops->type;
e->fd = fd;
e->flags = 0;
}
/*
* Make this fle fake, so that files collecting engine
* closes them at the end.
*/
return collect_fd_to(vpid(owner), e, rsti(owner), tgt, true);
}
int unix_note_scm_rights(int id_for, uint32_t *file_ids, int *fds, int n_ids)
{
struct unix_sk_info *ui;
struct pstree_item *owner;
int i;
ui = find_queuer_for(id_for);
if (!ui) {
pr_err("Can't find sender for %d\n", id_for);
return -1;
}
pr_info("Found queuer for %d -> %d\n", id_for, ui->ue->id);
/*
* This is the task that will restore this socket
*/
owner = file_master(&ui->d)->task;
pr_info("-> will set up deps\n");
/*
* The ui will send data to the rights receiver. Add a fake fle
* for the file and a dependency.
*/
for (i = 0; i < n_ids; i++) {
struct file_desc *tgt;
struct scm_fle *sfle;
tgt = find_file_desc_raw(FD_TYPES__UND, file_ids[i]);
if (!tgt) {
pr_err("Can't find fdesc to send\n");
return -1;
}
pr_info("scm: add file %d -> %d\n", tgt->id, vpid(owner));
sfle = xmalloc(sizeof(*sfle));
if (!sfle)
return -1;
sfle->fle = get_fle_for_scm(tgt, owner);
if (!sfle->fle) {
pr_err("Can't request new fle for scm\n");
return -1;
}
list_add_tail(&sfle->l, &ui->scm_fles);
fds[i] = sfle->fle->fe->fd;
}
return 0;
}
static int chk_restored_scms(struct unix_sk_info *ui)
{
struct scm_fle *sf, *n;
list_for_each_entry_safe(sf, n, &ui->scm_fles, l) {
if (sf->fle->stage < FLE_OPEN)
return 1;
/* Optimization for the next pass */
list_del(&sf->l);
xfree(sf);
}
return 0;
}
static int wake_connected_sockets(struct unix_sk_info *ui) static int wake_connected_sockets(struct unix_sk_info *ui)
{ {
struct fdinfo_list_entry *fle; struct fdinfo_list_entry *fle;
...@@ -1306,12 +1447,18 @@ static int open_unix_sk(struct file_desc *d, int *new_fd) ...@@ -1306,12 +1447,18 @@ static int open_unix_sk(struct file_desc *d, int *new_fd)
struct unix_sk_info *ui; struct unix_sk_info *ui;
int ret; int ret;
ui = container_of(d, struct unix_sk_info, d);
/* FIXME -- only queue restore may be postponed */
if (chk_restored_scms(ui)) {
pr_info("scm: Wait for tgt to restore\n");
return 1;
}
fle = file_master(d); fle = file_master(d);
if (fle->stage >= FLE_OPEN) if (fle->stage >= FLE_OPEN)
return post_open_unix_sk(d, fle->fe->fd); return post_open_unix_sk(d, fle->fe->fd);
ui = container_of(d, struct unix_sk_info, d);
if (inherited_fd(d, new_fd)) { if (inherited_fd(d, new_fd)) {
ui->ue->uflags |= USK_INHERIT; ui->ue->uflags |= USK_INHERIT;
ret = *new_fd >= 0 ? 0 : -1; ret = *new_fd >= 0 ? 0 : -1;
...@@ -1410,6 +1557,7 @@ static int collect_one_unixsk(void *o, ProtobufCMessage *base, struct cr_img *i) ...@@ -1410,6 +1557,7 @@ static int collect_one_unixsk(void *o, ProtobufCMessage *base, struct cr_img *i)
ui->listen = 0; ui->listen = 0;
INIT_LIST_HEAD(&ui->connected); INIT_LIST_HEAD(&ui->connected);
INIT_LIST_HEAD(&ui->node); INIT_LIST_HEAD(&ui->node);
INIT_LIST_HEAD(&ui->scm_fles);
ui->flags = 0; ui->flags = 0;
uname = ui->name; uname = ui->name;
......
syntax = "proto2"; syntax = "proto2";
message scm_entry {
required uint32 type = 1;
repeated uint32 rights = 2;
}
message sk_packet_entry { message sk_packet_entry {
required uint32 id_for = 1; required uint32 id_for = 1;
required uint32 length = 2; required uint32 length = 2;
repeated scm_entry scm = 4;
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment