Commit 019ebec0 authored by Andrey Vagin's avatar Andrey Vagin Committed by Andrei Vagin

unix: restore sockets on correct mount points

Currently we restore all sockets in the root mount namespace, because we
were not able to get any information about a mount point where a socket
is bound. It is obviously incorrect in some cases.

In 4.10 kernel, we added the SIOCUNIXFILE ioctl for unix sockets.  This
ioctl opens a file to which a socket is bound and returns a file
descriptor.

This new ioctl allows us to get mnt_id by reading fdinfo, and mnt_id
is enough to find a proper mount point and a mount namespace.

The logic of this patch is straight forward. On dump, we save mnt_id for
sockets, on restore we find a mount namespace by mnt_id and restore this
socket in its mount namespace.
Acked-by: 's avatarCyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: 's avatarAndrei Vagin <avagin@virtuozzo.com>
parent 6d785e6c
...@@ -224,6 +224,27 @@ int kerndat_socket_unix_file(void) ...@@ -224,6 +224,27 @@ int kerndat_socket_unix_file(void)
return 0; return 0;
} }
static int get_mnt_id(int lfd, int *mnt_id)
{
struct fdinfo_common fdinfo = { .mnt_id = -1 };
int ret, fd;
fd = ioctl(lfd, SIOCUNIXFILE);
if (fd < 0) {
pr_perror("Unable to get a socker file descriptor");
return -1;
}
ret = parse_fdinfo(fd, FD_TYPES__UND, &fdinfo);
close(fd);
if (ret < 0)
return -1;
*mnt_id = fdinfo.mnt_id;
return 0;
}
static int resolve_rel_name(u32 id, struct unix_sk_desc *sk, const struct fd_parms *p, char **pdir) static int resolve_rel_name(u32 id, struct unix_sk_desc *sk, const struct fd_parms *p, char **pdir)
{ {
const char *dirs[] = { "cwd", "root" }; const char *dirs[] = { "cwd", "root" };
...@@ -294,7 +315,7 @@ err: ...@@ -294,7 +315,7 @@ err:
return -ENOENT; return -ENOENT;
} }
static int unix_resolve_name(u32 id, struct unix_sk_desc *d, static int unix_resolve_name(int lfd, u32 id, struct unix_sk_desc *d,
UnixSkEntry *ue, const struct fd_parms *p); UnixSkEntry *ue, const struct fd_parms *p);
static int dump_one_unix_fd(int lfd, u32 id, const struct fd_parms *p) static int dump_one_unix_fd(int lfd, u32 id, const struct fd_parms *p)
{ {
...@@ -348,7 +369,7 @@ static int dump_one_unix_fd(int lfd, u32 id, const struct fd_parms *p) ...@@ -348,7 +369,7 @@ static int dump_one_unix_fd(int lfd, u32 id, const struct fd_parms *p)
ue->opts = skopts; ue->opts = skopts;
ue->uflags = 0; ue->uflags = 0;
if (unix_resolve_name(id, sk, ue, p)) if (unix_resolve_name(lfd, id, sk, ue, p))
goto err; goto err;
/* /*
...@@ -524,7 +545,7 @@ const struct fdtype_ops unix_dump_ops = { ...@@ -524,7 +545,7 @@ const struct fdtype_ops unix_dump_ops = {
.dump = dump_one_unix_fd, .dump = dump_one_unix_fd,
}; };
static int unix_resolve_name(u32 id, struct unix_sk_desc *d, static int unix_resolve_name(int lfd, u32 id, struct unix_sk_desc *d,
UnixSkEntry *ue, const struct fd_parms *p) UnixSkEntry *ue, const struct fd_parms *p)
{ {
char *name = d->name; char *name = d->name;
...@@ -533,11 +554,21 @@ static int unix_resolve_name(u32 id, struct unix_sk_desc *d, ...@@ -533,11 +554,21 @@ static int unix_resolve_name(u32 id, struct unix_sk_desc *d,
struct ns_id *ns; struct ns_id *ns;
struct stat st; struct stat st;
int mntns_root; int mntns_root;
int ret; int ret, mnt_id;
if (d->namelen == 0 || name[0] == '\0') if (d->namelen == 0 || name[0] == '\0')
return 0; return 0;
if (kdat.sk_unix_file && (root_ns_mask & CLONE_NEWNS)) {
if (get_mnt_id(lfd, &mnt_id))
return -1;
ue->mnt_id = mnt_id;
ue->has_mnt_id = mnt_id;
}
if (ue->mnt_id >= 0)
ns = lookup_nsid_by_mnt_id(ue->mnt_id);
else
ns = lookup_ns_by_id(root_item->ids->mnt_ns_id, &mnt_ns_desc); ns = lookup_ns_by_id(root_item->ids->mnt_ns_id, &mnt_ns_desc);
if (!ns) { if (!ns) {
ret = -ENOENT; ret = -ENOENT;
...@@ -1083,12 +1114,17 @@ static int restore_sk_common(int fd, struct unix_sk_info *ui) ...@@ -1083,12 +1114,17 @@ static int restore_sk_common(int fd, struct unix_sk_info *ui)
return 0; return 0;
} }
static void revert_unix_sk_cwd(int *prev_cwd_fd, int *root_fd) static int revert_unix_sk_cwd(int *prev_cwd_fd, int *root_fd, int *ns_fd)
{ {
int ret = 0;
if (*ns_fd >= 0 && restore_ns(*ns_fd, &mnt_ns_desc))
ret = -1;
if (*root_fd >= 0) { if (*root_fd >= 0) {
if (fchdir(*root_fd) || chroot(".")) if (fchdir(*root_fd) || chroot("."))
pr_perror("Can't revert root directory"); pr_perror("Can't revert root directory");
close_safe(root_fd); close_safe(root_fd);
ret = -1;
} }
if (prev_cwd_fd && *prev_cwd_fd >= 0) { if (prev_cwd_fd && *prev_cwd_fd >= 0) {
if (fchdir(*prev_cwd_fd)) if (fchdir(*prev_cwd_fd))
...@@ -1097,14 +1133,38 @@ static void revert_unix_sk_cwd(int *prev_cwd_fd, int *root_fd) ...@@ -1097,14 +1133,38 @@ static void revert_unix_sk_cwd(int *prev_cwd_fd, int *root_fd)
pr_debug("Reverted working dir\n"); pr_debug("Reverted working dir\n");
close(*prev_cwd_fd); close(*prev_cwd_fd);
*prev_cwd_fd = -1; *prev_cwd_fd = -1;
ret = -1;
} }
return ret;
} }
static int prep_unix_sk_cwd(struct unix_sk_info *ui, int *prev_cwd_fd, int *prev_root_fd) static int prep_unix_sk_cwd(struct unix_sk_info *ui, int *prev_cwd_fd,
int *prev_root_fd, int *prev_mntns_fd)
{ {
static struct ns_id *root = NULL; static struct ns_id *root = NULL, *ns;
int fd; int fd;
if (prev_mntns_fd && ui->name[0] && ui->ue->mnt_id >= 0) {
struct ns_id *mntns = lookup_nsid_by_mnt_id(ui->ue->mnt_id);
int ns_fd;
if (mntns == NULL) {
pr_err("Unable to find the %d mount\n", ui->ue->mnt_id);
return -1;
}
ns_fd = fdstore_get(mntns->mnt.nsfd_id);
if (ns_fd < 0)
return -1;
if (switch_ns_by_fd(ns_fd, &mnt_ns_desc, prev_mntns_fd))
return -1;
set_proc_self_fd(-1);
close(ns_fd);
}
*prev_cwd_fd = open(".", O_RDONLY); *prev_cwd_fd = open(".", O_RDONLY);
if (*prev_cwd_fd < 0) { if (*prev_cwd_fd < 0) {
pr_perror("Can't open current dir"); pr_perror("Can't open current dir");
...@@ -1112,15 +1172,23 @@ static int prep_unix_sk_cwd(struct unix_sk_info *ui, int *prev_cwd_fd, int *prev ...@@ -1112,15 +1172,23 @@ static int prep_unix_sk_cwd(struct unix_sk_info *ui, int *prev_cwd_fd, int *prev
} }
if (prev_root_fd && (root_ns_mask & CLONE_NEWNS)) { if (prev_root_fd && (root_ns_mask & CLONE_NEWNS)) {
if (ui->ue->mnt_id >= 0) {
ns = lookup_nsid_by_mnt_id(ui->ue->mnt_id);
if (ns == NULL)
goto err;
} else {
if (root == NULL) if (root == NULL)
root = lookup_ns_by_id(root_item->ids->mnt_ns_id, &mnt_ns_desc); root = lookup_ns_by_id(root_item->ids->mnt_ns_id,
&mnt_ns_desc);
ns = root;
}
*prev_root_fd = open("/", O_RDONLY); *prev_root_fd = open("/", O_RDONLY);
if (*prev_root_fd < 0) { if (*prev_root_fd < 0) {
pr_perror("Can't open current root"); pr_perror("Can't open current root");
goto err; goto err;
} }
fd = fdstore_get(root->mnt.root_fd_id); fd = fdstore_get(ns->mnt.root_fd_id);
if (fd < 0) { if (fd < 0) {
pr_err("Can't get root fd\n"); pr_err("Can't get root fd\n");
goto err; goto err;
...@@ -1159,7 +1227,7 @@ static int post_open_standalone(struct file_desc *d, int fd) ...@@ -1159,7 +1227,7 @@ static int post_open_standalone(struct file_desc *d, int fd)
struct unix_sk_info *ui; struct unix_sk_info *ui;
struct unix_sk_info *peer; struct unix_sk_info *peer;
struct sockaddr_un addr; struct sockaddr_un addr;
int cwd_fd = -1, root_fd = -1; int cwd_fd = -1, root_fd = -1, ns_fd = -1;
ui = container_of(d, struct unix_sk_info, d); ui = container_of(d, struct unix_sk_info, d);
BUG_ON((ui->flags & (USK_PAIR_MASTER | USK_PAIR_SLAVE)) || BUG_ON((ui->flags & (USK_PAIR_MASTER | USK_PAIR_SLAVE)) ||
...@@ -1188,19 +1256,19 @@ static int post_open_standalone(struct file_desc *d, int fd) ...@@ -1188,19 +1256,19 @@ static int post_open_standalone(struct file_desc *d, int fd)
pr_info("\tConnect %#x to %#x\n", ui->ue->ino, peer->ue->ino); pr_info("\tConnect %#x to %#x\n", ui->ue->ino, peer->ue->ino);
if (prep_unix_sk_cwd(peer, &cwd_fd, NULL)) if (prep_unix_sk_cwd(peer, &cwd_fd, NULL, &ns_fd))
return -1; return -1;
if (connect(fd, (struct sockaddr *)&addr, if (connect(fd, (struct sockaddr *)&addr,
sizeof(addr.sun_family) + sizeof(addr.sun_family) +
peer->ue->name.len) < 0) { peer->ue->name.len) < 0) {
pr_perror("Can't connect %#x socket", ui->ue->ino); pr_perror("Can't connect %#x socket", ui->ue->ino);
revert_unix_sk_cwd(&cwd_fd, &root_fd); revert_unix_sk_cwd(&cwd_fd, &root_fd, &ns_fd);
return -1; return -1;
} }
ui->is_connected = true; ui->is_connected = true;
revert_unix_sk_cwd(&cwd_fd, &root_fd); revert_unix_sk_cwd(&cwd_fd, &root_fd, &ns_fd);
restore_queue: restore_queue:
if (peer->queuer == ui && if (peer->queuer == ui &&
...@@ -1216,7 +1284,7 @@ restore_sk_common: ...@@ -1216,7 +1284,7 @@ restore_sk_common:
static int bind_unix_sk(int sk, struct unix_sk_info *ui) static int bind_unix_sk(int sk, struct unix_sk_info *ui)
{ {
struct sockaddr_un addr; struct sockaddr_un addr;
int cwd_fd = -1, root_fd = -1; int cwd_fd = -1, root_fd = -1, ns_fd = -1;
int ret = -1; int ret = -1;
if (ui->ue->name.len == 0) if (ui->ue->name.len == 0)
...@@ -1238,10 +1306,10 @@ static int bind_unix_sk(int sk, struct unix_sk_info *ui) ...@@ -1238,10 +1306,10 @@ static int bind_unix_sk(int sk, struct unix_sk_info *ui)
addr.sun_family = AF_UNIX; addr.sun_family = AF_UNIX;
memcpy(&addr.sun_path, ui->name, ui->ue->name.len); memcpy(&addr.sun_path, ui->name, ui->ue->name.len);
if (prep_unix_sk_cwd(ui, &cwd_fd, NULL)) if (ui->ue->name.len) {
if (ui->name[0] && prep_unix_sk_cwd(ui, &cwd_fd, NULL, &ns_fd))
return -1; return -1;
if (ui->ue->name.len) {
ret = bind(sk, (struct sockaddr *)&addr, ret = bind(sk, (struct sockaddr *)&addr,
sizeof(addr.sun_family) + ui->ue->name.len); sizeof(addr.sun_family) + ui->ue->name.len);
if (ret < 0) { if (ret < 0) {
...@@ -1324,7 +1392,7 @@ static int bind_unix_sk(int sk, struct unix_sk_info *ui) ...@@ -1324,7 +1392,7 @@ static int bind_unix_sk(int sk, struct unix_sk_info *ui)
ret = 0; ret = 0;
done: done:
revert_unix_sk_cwd(&cwd_fd, &root_fd); revert_unix_sk_cwd(&cwd_fd, &root_fd, &ns_fd);
return ret; return ret;
} }
...@@ -1651,12 +1719,12 @@ static struct file_desc_ops unix_desc_ops = { ...@@ -1651,12 +1719,12 @@ static struct file_desc_ops unix_desc_ops = {
*/ */
static void unlink_stale(struct unix_sk_info *ui) static void unlink_stale(struct unix_sk_info *ui)
{ {
int ret, cwd_fd = -1, root_fd = -1; int ret, cwd_fd = -1, root_fd = -1, ns_fd = -1;
if (ui->name[0] == '\0' || (ui->ue->uflags & USK_EXTERN)) if (ui->name[0] == '\0' || (ui->ue->uflags & USK_EXTERN))
return; return;
if (prep_unix_sk_cwd(ui, &cwd_fd, &root_fd)) if (prep_unix_sk_cwd(ui, &cwd_fd, &root_fd, NULL))
return; return;
ret = unlinkat(AT_FDCWD, ui->name, 0) ? -1 : 0; ret = unlinkat(AT_FDCWD, ui->name, 0) ? -1 : 0;
...@@ -1666,7 +1734,7 @@ static void unlink_stale(struct unix_sk_info *ui) ...@@ -1666,7 +1734,7 @@ static void unlink_stale(struct unix_sk_info *ui)
ui->name ? (ui->name[0] ? ui->name : &ui->name[1]) : "-", ui->name ? (ui->name[0] ? ui->name : &ui->name[1]) : "-",
ui->name_dir ? ui->name_dir : "-"); ui->name_dir ? ui->name_dir : "-");
} }
revert_unix_sk_cwd(&cwd_fd, &root_fd); revert_unix_sk_cwd(&cwd_fd, &root_fd, &ns_fd);
} }
static void try_resolve_unix_peer(struct unix_sk_info *ui); static void try_resolve_unix_peer(struct unix_sk_info *ui);
......
...@@ -50,4 +50,5 @@ message unix_sk_entry { ...@@ -50,4 +50,5 @@ message unix_sk_entry {
optional bool deleted = 15; optional bool deleted = 15;
optional uint32 ns_id = 16; optional uint32 ns_id = 16;
optional sint32 mnt_id = 17 [default = -1];
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment