Commit c5eb61e8 authored by Pavel Emelyanov's avatar Pavel Emelyanov Committed by Cyrill Gorcunov

Unix sockets initial support

Currently it can only work with stream sockets, which have no skbs in queues
(listening or established -- both work OK).

The cpt part uses the sock_diag engine that was merged to Dave recently to
collect sockets. Then it dumps sockets by checking the filesystem ID of a
failed-to-open through /proc/pid/fd descriptors (sockets do not allow for
such tricks with opens through proc) against SOCKFS_TYPE.

The rst part is more tricky. Listen sockets are just restored, this is simple.
Connected sockets are restored like this:

1. One end establishes a listening anon socket at the desired descriptor;
2. The other end just creates a socket at the desired descriptor;
3. All sockets, that are to be connect()-ed call connect. Unix sockets
   do not block connect() till the accept() time and thus we continue with...
4. ... all listening sockets call accept() and ... dup2 the new fd into the
   accepting end.

There's a problem with this approach -- socket names are not preserved, but
looking into our OpenVZ implementation I think this is OK for existing apps.

What should be done next is:

1. Need to merge the file IDs patches in our tree and make Andrey to
   support files sharing. This will solve the

	sk = socket();
	fork();

   case. Currently it simply doesn't work :(

2. Need to add support for DGRAM sockets -- I wrote comment how to do it
   in the can_dump_unix_sk()

3. Need to add support for in-flight connections

4. Implement support for UDP sockets (quite simple)

5. Implement support for listening TCP sockets (also not very complex)

6. Implement support for connected TCP scokets (hard one, Tejun's patches are not
   very good for this from my POV)

Cyrill, plz, apply this patch and put the above descriptions onto wiki docs (do we
have the plans page yet?).

Andrey, plz, take care of unix sockets tests in zdtm. Most likely it won't work till
you do the shared files support for sockets.
Signed-off-by: 's avatarPavel Emelyanov <xemul@parallels.com>
Signed-off-by: 's avatarCyrill Gorcunov <gorcunov@openvz.org>
parent bf7a74d4
......@@ -67,6 +67,8 @@ OBJS += util.o
OBJS += ptrace.o
OBJS += restorer.o
OBJS += log.o
OBJS += libnetlink.o
OBJS += sockets.o
DEPS := $(patsubst %.o,%.d,$(OBJS))
......
......@@ -25,6 +25,7 @@
#include "syscall.h"
#include "ptrace.h"
#include "util.h"
#include "sockets.h"
#include "image.h"
......@@ -239,6 +240,10 @@ static int dump_one_fd(char *pid_fd_dir, int dir, char *fd_name, unsigned long p
fd = openat(dir, fd_name, O_RDONLY);
if (fd < 0) {
err = __try_dump_socket(pid_fd_dir, fd_name, cr_fdset);
if (err != 1)
return err;
pr_perror("Failed to openat %s/%d %s\n", pid_fd_dir, dir, fd_name);
return -1;
}
......@@ -1179,6 +1184,9 @@ int cr_dump_tasks(pid_t pid, struct cr_options *opts)
if (collect_pstree(pid, &pstree_list))
goto err;
if (collect_sockets())
goto err;
/*
* Since ptrace-seize doesn't work on frozen tasks
* we stick with explicit tasks stopping via stop
......
......@@ -30,6 +30,7 @@
#include "log.h"
#include "syscall.h"
#include "restorer.h"
#include "sockets.h"
#include "crtools.h"
......@@ -1251,6 +1252,9 @@ static int restore_one_task(int pid)
if (prepare_pipes(pid))
return -1;
if (prepare_sockets(pid))
return -1;
if (prepare_fds(pid))
return -1;
......
......@@ -17,7 +17,7 @@
#include "compiler.h"
#include "crtools.h"
#include "util.h"
#include "sockets.h"
#include "image.h"
#define DEF_PAGES_PER_LINE 6
......@@ -449,6 +449,9 @@ static int cr_parse_file(struct cr_options *opts)
case SIGACT_MAGIC:
show_sigacts(opts->show_dump_file, fd, true);
break;
case UNIXSK_MAGIC:
show_unixsk(opts->show_dump_file, fd, true);
break;
default:
pr_err("Unknown magic %x on %s\n", opts->show_dump_file);
goto err;
......@@ -617,6 +620,9 @@ static int cr_show_all(unsigned long pid, struct cr_options *opts)
show_sigacts(cr_fdset->desc[CR_FD_SIGACT].path,
cr_fdset->desc[CR_FD_SIGACT].fd, true);
show_unixsk(cr_fdset->desc[CR_FD_UNIXSK].path,
cr_fdset->desc[CR_FD_UNIXSK].fd, true);
close_cr_fdset(cr_fdset);
free_cr_fdset(&cr_fdset);
......
......@@ -17,6 +17,7 @@
#include "crtools.h"
#include "util.h"
#include "log.h"
#include "sockets.h"
static struct cr_options opts;
struct page_entry zero_page_entry;
......@@ -78,6 +79,12 @@ struct cr_fd_desc_tmpl fdset_template[CR_FD_MAX] = {
.fmt = FMT_FNAME_SIGACTS,
.magic = SIGACT_MAGIC,
},
/* info about unix sockets */
[CR_FD_UNIXSK] = {
.fmt = FMT_FNAME_UNIXSK,
.magic = UNIXSK_MAGIC,
},
};
struct cr_fdset *alloc_cr_fdset(pid_t pid)
......
......@@ -23,6 +23,7 @@ enum {
CR_FD_PSTREE,
CR_FD_SHMEM,
CR_FD_SIGACT,
CR_FD_UNIXSK,
CR_FD_MAX
};
......@@ -57,6 +58,7 @@ struct cr_fd_desc_tmpl {
#define FMT_FNAME_SHMEM "shmem-%d.img"
#define FMT_FNAME_VMAS "vmas-%d.img"
#define FMT_FNAME_SIGACTS "sigacts-%d.img"
#define FMT_FNAME_UNIXSK "unixsk-%d.img"
extern int get_image_path(char *path, int size, const char *fmt, int pid);
......
......@@ -12,6 +12,7 @@
#define PSTREE_MAGIC 0x40044004
#define PIPES_MAGIC 0x05055050
#define SIGACT_MAGIC 0x60606060
#define UNIXSK_MAGIC 0x07070707
#define FDINFO_FD 1
#define FDINFO_MAP 2
......@@ -52,6 +53,18 @@ struct pipe_entry {
u8 data[0];
} __packed;
struct unix_sk_entry {
u32 fd;
u32 id;
u8 type;
u8 state;
u8 namelen; /* fits UNIX_PATH_MAX */
u8 pad;
u32 backlog;
u32 peer;
u8 name[0];
} __packed;
struct vma_entry {
u64 start;
u64 end;
......
#ifndef __LINUX_RTNETLINK_H
#define __LINUX_RTNETLINK_H
struct rtattr {
unsigned short rta_len;
unsigned short rta_type;
};
/* Macros to handle rtattributes */
#define RTA_ALIGNTO 4
#define RTA_ALIGN(len) ( ((len)+RTA_ALIGNTO-1) & ~(RTA_ALIGNTO-1) )
#define RTA_OK(rta,len) ((len) >= (int)sizeof(struct rtattr) && \
(rta)->rta_len >= sizeof(struct rtattr) && \
(rta)->rta_len <= (len))
#define RTA_NEXT(rta,attrlen) ((attrlen) -= RTA_ALIGN((rta)->rta_len), \
(struct rtattr*)(((char*)(rta)) + RTA_ALIGN((rta)->rta_len)))
#define RTA_LENGTH(len) (RTA_ALIGN(sizeof(struct rtattr)) + (len))
#define RTA_SPACE(len) RTA_ALIGN(RTA_LENGTH(len))
#define RTA_DATA(rta) ((void*)(((char*)(rta)) + RTA_LENGTH(0)))
#define RTA_PAYLOAD(rta) ((int)((rta)->rta_len) - RTA_LENGTH(0))
#define NLMSG_ALIGN(len) ( ((len)+NLMSG_ALIGNTO-1) & ~(NLMSG_ALIGNTO-1) )
#define NLMSG_HDRLEN ((int) NLMSG_ALIGN(sizeof(struct nlmsghdr)))
#define NLMSG_LENGTH(len) ((len)+NLMSG_ALIGN(NLMSG_HDRLEN))
#define NLMSG_SPACE(len) NLMSG_ALIGN(NLMSG_LENGTH(len))
#define NLMSG_DATA(nlh) ((void*)(((char*)nlh) + NLMSG_LENGTH(0)))
#define NLMSG_NEXT(nlh,len) ((len) -= NLMSG_ALIGN((nlh)->nlmsg_len), \
(struct nlmsghdr*)(((char*)(nlh)) + NLMSG_ALIGN((nlh)->nlmsg_len)))
#define NLMSG_OK(nlh,len) ((len) >= (int)sizeof(struct nlmsghdr) && \
(nlh)->nlmsg_len >= sizeof(struct nlmsghdr) && \
(nlh)->nlmsg_len <= (len))
#define NLMSG_PAYLOAD(nlh,len) ((nlh)->nlmsg_len - NLMSG_SPACE((len)))
#define NLMSG_NOOP 0x1 /* Nothing. */
#define NLMSG_ERROR 0x2 /* Error */
#define NLMSG_DONE 0x3 /* End of a dump */
#define NLMSG_OVERRUN 0x4 /* Data lost */
#define NLMSG_MIN_TYPE 0x10 /* < 0x10: reserved control messages */
int parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len);
int nlmsg_receive(char *buf, int len, int (*cb)(struct nlmsghdr *));
#endif
#ifndef __CRTOOLS_SOCKETS_H__
#define __CRTOOLS_SOCKETS_H__
int collect_sockets(void);
struct cr_fdset;
int __try_dump_socket(char *dir_name, char *fd_name, struct cr_fdset *cr_fdset);
int prepare_sockets(int pid);
void show_unixsk(char *name, int fd, bool show_header);
#endif
#ifndef __UNIX_DIAG_H__
#define __UNIX_DIAG_H__
struct unix_diag_req {
__u8 sdiag_family;
__u8 sdiag_protocol;
__u16 pad;
__u32 udiag_states;
__u32 udiag_ino;
__u32 udiag_show;
__u32 udiag_cookie[2];
};
#define UDIAG_SHOW_NAME 0x00000001 /* show name (not path) */
#define UDIAG_SHOW_VFS 0x00000002 /* show VFS inode info */
#define UDIAG_SHOW_PEER 0x00000004 /* show peer socket info */
#define UDIAG_SHOW_ICONS 0x00000008 /* show pending connections */
#define UDIAG_SHOW_RQLEN 0x00000010 /* show skb receive queue len */
struct unix_diag_msg {
__u8 udiag_family;
__u8 udiag_type;
__u8 udiag_state;
__u8 pad;
__u32 udiag_ino;
__u32 udiag_cookie[2];
};
enum {
UNIX_DIAG_NAME,
UNIX_DIAG_VFS,
UNIX_DIAG_PEER,
UNIX_DIAG_ICONS,
UNIX_DIAG_RQLEN,
UNIX_DIAG_MAX,
};
struct unix_diag_vfs {
__u32 udiag_vfs_ino;
__u32 udiag_vfs_dev;
};
#endif
#include <linux/types.h>
#include <linux/netlink.h>
#include <string.h>
#include "libnetlink.h"
#include "util.h"
int parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len)
{
memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
while (RTA_OK(rta, len)) {
if ((rta->rta_type <= max) && (!tb[rta->rta_type]))
tb[rta->rta_type] = rta;
rta = RTA_NEXT(rta,len);
}
if (len)
pr_warning("Trimmed RTA: len %d, rta_len %d\n", len, rta->rta_len);
return 0;
}
int nlmsg_receive(char *buf, int len, int (*cb)(struct nlmsghdr *))
{
struct nlmsghdr *hdr;
for (hdr = (struct nlmsghdr *)buf; NLMSG_OK(hdr, len); hdr = NLMSG_NEXT(hdr, len)) {
if (hdr->nlmsg_seq != 24680)
continue;
if (hdr->nlmsg_type == NLMSG_DONE)
return 0;
if (hdr->nlmsg_type == NLMSG_ERROR) {
pr_err("Error getting scokets list\n");
return -1;
}
if (cb(hdr))
return -1;
}
return 1;
}
#include <sys/socket.h>
#include <linux/netlink.h>
#include <linux/types.h>
#include <linux/net.h>
#include <linux/un.h>
#include <sys/types.h>
#include <sys/vfs.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <netinet/tcp.h>
#include <errno.h>
#include <unistd.h>
#include "types.h"
#include "libnetlink.h"
#include "sockets.h"
#include "unix_diag.h"
#include "util.h"
#include "image.h"
#include "crtools.h"
#define USK_DEBUG
static char buf[4096];
#ifndef NETLINK_SOCK_DIAG
#define NETLINK_SOCK_DIAG NETLINK_INET_DIAG
#endif
#ifndef SOCK_DIAG_BY_FAMILY
#define SOCK_DIAG_BY_FAMILY 20
#endif
#ifndef SOCKFS_MAGIC
#define SOCKFS_MAGIC 0x534F434B
#endif
struct socket_desc {
unsigned int family;
unsigned int ino;
struct socket_desc *next;
};
struct unix_sk_desc {
struct socket_desc sd;
unsigned int type;
unsigned int state;
unsigned int peer_ino;
unsigned int rqlen;
unsigned int namelen;
char *name;
unsigned int *icons;
};
#define SK_HASH_SIZE 32
static struct socket_desc *sockets[SK_HASH_SIZE];
static struct socket_desc *lookup_socket(int ino)
{
struct socket_desc *d;
for (d = sockets[ino % SK_HASH_SIZE]; d != NULL; d = d->next)
if (d->ino == ino)
break;
return d;
}
static int sk_collect_one(int ino, int family, struct socket_desc *d)
{
d->ino = ino;
d->family = family;
d->next = sockets[ino % SK_HASH_SIZE];
sockets[ino % SK_HASH_SIZE] = d;
return 0;
}
static void show_one_unix(char *act, struct unix_sk_desc *sk)
{
#ifdef USK_DEBUG
pr_info("\t%s: ino %d type %d state %d name %s\n", act,
sk->sd.ino, sk->type, sk->state, sk->name);
#endif
}
static void show_one_unix_img(char *act, struct unix_sk_entry *e)
{
#ifdef USK_DEBUG
pr_info("\t%s: fd %d type %d state %d name %d bytes\n", act,
e->fd, e->type, e->state, e->namelen);
#endif
}
static int can_dump_unix_sk(struct unix_sk_desc *sk)
{
if (sk->type != SOCK_STREAM) {
/*
* Dgram sockets connect and accept jobs at
* restore time should be fixed
*/
pr_err("Only stream sockets for now\n");
return 0;
}
switch (sk->state) {
case TCP_LISTEN:
if (sk->rqlen != 0) {
/*
* Currently the ICONS nla reports the conn
* requests for listen sockets. Need to pick
* those up and fix the connect job respectively
*/
pr_err("In-flight connection (l)\n");
return 0;
}
break;
case TCP_ESTABLISHED:
if (!sk->peer_ino) {
/*
* Read above
*/
pr_err("In-flight connection\n");
return 0;
}
if (sk->rqlen) {
/*
* The hard case :( Currentl there's no way to
* clone the sk queue. Even the MSG_PEEK doesn't
* help, since it picks up the head of the queue
* always. Some more patches should go
*/
pr_err("Non empty queue\n");
return 0;
}
break;
default:
pr_err("Unknown state %d\n", sk->state);
return 0;
}
return 1;
}
#define USK_DEF_BACKLOG 16
static int dump_one_unix(struct socket_desc *_sk, char *fd, struct cr_fdset *cr_fdset)
{
struct unix_sk_desc *sk = (struct unix_sk_desc *)_sk;
struct unix_sk_entry ue;
if (!can_dump_unix_sk(sk))
goto err;
ue.fd = atoi(fd);
ue.id = sk->sd.ino;
ue.type = sk->type;
ue.state = sk->state;
ue.namelen = sk->namelen;
ue.backlog = USK_DEF_BACKLOG; /* FIXME */
ue.pad = 0;
ue.peer = sk->peer_ino;
write_ptr_safe(cr_fdset->desc[CR_FD_UNIXSK].fd, &ue, err);
write_safe(cr_fdset->desc[CR_FD_UNIXSK].fd, sk->name, ue.namelen, err);
pr_info("Dumping unix socket at %s\n", fd);
show_one_unix("Dumping", sk);
show_one_unix_img("Dumped", &ue);
return 0;
err:
return -1;
}
int __try_dump_socket(char *dir, char *fd, struct cr_fdset *cr_fdset)
{
struct statfs fst;
struct stat st;
struct socket_desc *sk;
sprintf(buf,"%s/%s", dir, fd);
if (statfs(buf, &fst)) {
pr_err("Can't statfs %s\n", buf);
return -1;
}
if (stat(buf, &st)) {
pr_err("Can't stat %s\n", buf);
return -1;
}
if (fst.f_type != SOCKFS_MAGIC)
return 1; /* not a socket, proceed with caller error */
sk = lookup_socket(st.st_ino);
if (sk == NULL) {
pr_err("Uncollected socket %d\n", st.st_ino);
return -1;
}
switch (sk->family) {
case AF_UNIX:
return dump_one_unix(sk, fd, cr_fdset);
default:
pr_err("BUG! Unknown socket collected\n");
return -1;
}
}
static int unix_collect_one(struct unix_diag_msg *m, struct rtattr **tb)
{
struct unix_sk_desc *d, **h;
d = xzalloc(sizeof(*d));
if (d == NULL)
return -1;
d->type = m->udiag_type;
d->state = m->udiag_state;
if (tb[UNIX_DIAG_PEER])
d->peer_ino = *(int *)RTA_DATA(tb[UNIX_DIAG_PEER]);
if (tb[UNIX_DIAG_NAME]) {
int len = RTA_PAYLOAD(tb[UNIX_DIAG_NAME]);
char *name;
name = xmalloc(len + 1);
if (!name)
goto err;
memcpy(name, RTA_DATA(tb[UNIX_DIAG_NAME]), len);
name[len] = '\0';
if (name[0] != '\0' && d->state == TCP_LISTEN) {
struct unix_diag_vfs *uv;
struct stat st;
if (name[0] != '/') {
pr_err("Relative bind path unsupported\n");
goto err;
}
if (!tb[UNIX_DIAG_VFS]) {
pr_err("Bound socket w/o inode %d\n",
m->udiag_ino);
goto err;
}
uv = RTA_DATA(tb[UNIX_DIAG_VFS]);
if (stat(name, &st)) {
pr_perror("Can't stat socket %d(%s)\n",
m->udiag_ino, name);
goto err;
}
if ((st.st_ino != uv->udiag_vfs_ino) ||
(st.st_dev == uv->udiag_vfs_dev)) {
/*
* When a listen socket is bound to
* unlinked file, we just drop his name,
* since noone will access it via one.
*/
xfree(name);
len = 0;
name = NULL;
}
}
d->namelen = len;
d->name = name;
}
if (tb[UNIX_DIAG_ICONS]) {
int len = RTA_PAYLOAD(tb[UNIX_DIAG_ICONS]);
d->icons = xmalloc(len + sizeof(u32));
if (!d->icons)
goto err;
memcpy(d->icons, RTA_DATA(tb[UNIX_DIAG_ICONS]), len);
d->icons[len / sizeof(__u32)] = 0;
}
if (tb[UNIX_DIAG_RQLEN])
d->rqlen = *(int *)RTA_DATA(tb[UNIX_DIAG_RQLEN]);
show_one_unix("Collected", d);
return sk_collect_one(m->udiag_ino, AF_UNIX, &d->sd);
err:
xfree(d->icons);
xfree(d->name);
xfree(d);
return -1;
}
static int unix_receive_one(struct nlmsghdr *h)
{
struct unix_diag_msg *m = NLMSG_DATA(h);
struct rtattr *tb[UNIX_DIAG_MAX+1];
parse_rtattr(tb, UNIX_DIAG_MAX, (struct rtattr *)(m + 1),
h->nlmsg_len - NLMSG_LENGTH(sizeof(*m)));
return unix_collect_one(m, tb);
}
static int collect_unix_sockets(int nl)
{
struct msghdr msg;
struct sockaddr_nl nladdr;
struct iovec iov;
struct {
struct nlmsghdr hdr;
struct unix_diag_req r;
} req;
memset(&msg, 0, sizeof(msg));
msg.msg_name = &nladdr;
msg.msg_namelen = sizeof(nladdr);
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
memset(&nladdr, 0, sizeof(nladdr));
nladdr.nl_family = AF_NETLINK;
iov.iov_base = &req;
iov.iov_len = sizeof(req);
memset(&req, 0, sizeof(req));
req.hdr.nlmsg_len = sizeof(req);
req.hdr.nlmsg_type = SOCK_DIAG_BY_FAMILY;
req.hdr.nlmsg_flags = NLM_F_ROOT|NLM_F_MATCH|NLM_F_REQUEST;
req.hdr.nlmsg_seq = 24680;
req.r.sdiag_family = AF_UNIX;
req.r.udiag_states = -1; /* All */
req.r.udiag_show = UDIAG_SHOW_NAME | UDIAG_SHOW_VFS | UDIAG_SHOW_PEER | UDIAG_SHOW_ICONS | UDIAG_SHOW_RQLEN;
if (sendmsg(nl, &msg, 0) < 0) {
pr_perror("Can't send request message\n");
goto out_err;
}
iov.iov_base = buf;
iov.iov_len = sizeof(buf);
while (1) {
int err;
memset(&msg, 0, sizeof(msg));
msg.msg_name = &nladdr;
msg.msg_namelen = sizeof(nladdr);
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
err = recvmsg(nl, &msg, 0);
if (err < 0) {
if (errno == EINTR)
continue;
goto out_err;
}
if (err == 0)
break;
err = nlmsg_receive(buf, err, unix_receive_one);
if (err < 0)
goto out_err;
if (err == 0)
break;
}
return 0;
out_err:
return -1;
}
int collect_sockets(void)
{
int err;
int nl;
nl = socket(PF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG);
if (nl < 0) {
pr_err("Can't create sock diag socket\n");
return -1;
}
err = collect_unix_sockets(nl);
close(nl);
return err;
}
struct unix_conn_job {
struct sockaddr_un addr;
int addrlen;
int fd;
struct unix_conn_job *next;
};
static void unix_show_job(char *type, int fd, int id)
{
#ifdef USK_DEBUG
pr_info("%s job fd %d id %d\n", type, fd, id);
#endif
}
static struct unix_conn_job *conn_jobs;
static int run_connect_jobs(void)
{
struct unix_conn_job *cj, *next;
cj = conn_jobs;
while (cj != NULL) {
int attempts = 8;
unix_show_job("Run conn", cj->fd, -1);
try_again:
if (connect(cj->fd, (struct sockaddr *)&cj->addr, cj->addrlen) < 0) {
if (attempts) {
usleep(1000);
attempts--;
goto try_again; /* FIXME - use avagin@'s waiters */
}
pr_perror("Can't restore connection (c)\n");
return -1;
}
unix_show_job("Fin conn", cj->fd, -1);
next = cj->next;
xfree(cj);
cj = next;
}
return 0;
}
struct unix_accept_job {
int fd;
struct unix_accept_job *next;
};
static struct unix_accept_job *accept_jobs;
static int run_accept_jobs(void)
{
struct unix_accept_job *aj, *next;
aj = accept_jobs;
while (aj != NULL) {
int fd;
unix_show_job("Run acc", aj->fd, -1);
fd = accept(aj->fd, NULL, NULL);
if (fd < 0) {
pr_perror("Can't restore connection (s)\n");
return -1;
}
dup2(fd, aj->fd);
close(fd);
unix_show_job("Fin acc", aj->fd, -1);
next = aj->next;
xfree(aj);
aj = next;
}
return 0;
}
static void prep_conn_addr(int id, struct sockaddr_un *addr, int *addrlen)
{
addr->sun_family = AF_UNIX;
addr->sun_path[0] = '\0';
sprintf(addr->sun_path + 1, "crtools-sk-%10d", id);
*addrlen = sizeof(addr->sun_family) + sizeof("crtools-sk-") - 1 + 10;
}
static int open_unix_sk(struct unix_sk_entry *ue, int *img_fd)
{
int sk;
show_one_unix_img("Restore", ue);
sk = socket(PF_UNIX, ue->type, 0);
if (sk < 0) {
pr_perror("Can't create unix socket\n");
return -1;
}
if (ue->state == TCP_LISTEN) {
struct sockaddr_un addr;
int ret;
if (!ue->namelen || ue->namelen > UNIX_PATH_MAX) {
pr_err("Bad unix name len %d\n", ue->namelen);
goto err;
}
memset(&addr, 0, sizeof(addr));
addr.sun_family = AF_UNIX;
ret = read(*img_fd, &addr.sun_path, ue->namelen);
if (ret != ue->namelen) {
pr_err("Error reading socket name from image (%d)", ret);
goto err;
}
if (addr.sun_path[0] != '\0')
unlink(addr.sun_path);
if (bind(sk, (struct sockaddr *)&addr,
sizeof(addr.sun_family) + ue->namelen) < 0) {
pr_perror("Can't bind socket\n");
goto err;
}
if (listen(sk, ue->backlog) < 0) {
pr_perror("Can't listen socket\n");
goto err;
}
} else if (ue->state == TCP_ESTABLISHED) {
if (ue->peer < ue->id) {
struct sockaddr_un addr;
int len;
struct unix_accept_job *aj;
/*
* Will become a server
*/
prep_conn_addr(ue->id, &addr, &len);
if (bind(sk, (struct sockaddr *)&addr, len) < 0) {
pr_perror("Can't bind socket\n");
goto err;
}
if (listen(sk, 1) < 0) {
pr_perror("Can't listen socket\n");
goto err;
}
aj = xmalloc(sizeof(*aj));
if (aj == NULL)
goto err;
aj->fd = ue->fd;
aj->next = accept_jobs;
accept_jobs = aj;
unix_show_job("Sched acc", ue->fd, ue->id);
} else {
struct unix_conn_job *cj;
/*
* Will do the connect
*/
cj = xmalloc(sizeof(*cj));
if (cj == NULL)
goto err;
prep_conn_addr(ue->peer, &cj->addr, &cj->addrlen);
cj->fd = ue->fd;
cj->next = conn_jobs;
conn_jobs = cj;
unix_show_job("Sched conn", ue->fd, ue->peer);
}
} else {
pr_err("Unknown state %d\n", ue->state);
goto err;
}
if (move_img_fd(img_fd, ue->fd))
return -1;
return reopen_fd_as(ue->fd, sk);
err:
close(sk);
return -1;
}
static int prepare_unix_sockets(int pid)
{
int usk_fd, ret;
u32 type;
usk_fd = open_image_ro(FMT_FNAME_UNIXSK, pid);
if (usk_fd < 0) {
pr_perror("%d: Can't open unix sk image\n", pid);
return -1;
}
read(usk_fd, &type, sizeof(type));
if (type != UNIXSK_MAGIC) {
pr_perror("%d: Bad unix sk file\n", pid);
return -1;
}
while (1) {
struct unix_sk_entry ue;
ret = read(usk_fd, &ue, sizeof(ue));
if (ret == 0)
break;
if (ret != sizeof(ue)) {
pr_perror("%d: Bad unix sk entry (ret %d)\n", pid, ret);
ret = -1;
break;
}
ret = open_unix_sk(&ue, &usk_fd);
if (ret)
break;
}
close(usk_fd);
if (!ret)
ret = run_connect_jobs();
if (!ret)
ret = run_accept_jobs();
return ret;
}
int prepare_sockets(int pid)
{
return prepare_unix_sockets(pid);
}
void show_unixsk(char *name, int fd, bool show_header)
{
struct unix_sk_entry ue;
if (show_header) {
pr_info("\n");
pr_info("CR_FD_UNIXSK: %s\n", name);
pr_info("----------------------------------------\n");
}
while (1) {
int ret = read_ptr_safe_eof(fd, &ue, out);
if (!ret)
goto out;
pr_info("fd %d type %d state %d namelen %d peer %d\n",
ue.fd, ue.type, ue.state, ue.namelen, ue.peer);
if (!ue.namelen)
continue;
ret = read_safe_eof(fd, buf, ue.namelen, out);
if (!ret)
goto out;
if (!buf[0])
buf[0] = '@';
pr_info("\tname [%s]\n", buf);
}
out:
if (show_header)
pr_info("----------------------------------------\n");
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment