Commit a14eedc2 authored by Kirill Tkhai's avatar Kirill Tkhai Committed by Pavel Emelyanov

files: Kill struct file_desc_ops::post_open

This make open fds asynchronous. Now we restore them in a poll-style.
Every ->open callback operates like a poll. It returns 1, i.e. "call me again",
if some events have not received yet, or 0, if restoring of a fle
has finished.

We iterate over list of file descriptors and look at its return value.
If there were not progress in restoring of a fle, we sleep on task_st
futex. As all events are sent together with changing task_st, we will
be woken up, when there is a some new work for us.

v6: Add a comment about open method return values

v5: Clear FDS_EVENT every beginning of cycle.
    Use 0, -1 and 1 for successful return error and "again" request.

v4: unix: 1)standalone sockets return ORV_AGAIN after first open
            to reopen fd by generic code
          2)do not call post_open_unix_sk() for !standalone sockets
    inet, epoll: return ORV_AGAIN after first open to reopen fd by generic code
    autofs: iterate used list instead of fds list
Signed-off-by: 's avatarKirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: 's avatarPavel Emelyanov <xemul@virtuozzo.com>
parent 839078f8
......@@ -832,7 +832,7 @@ static struct fdinfo_list_entry *find_fle_by_fd(struct list_head *head, int fd)
{
struct fdinfo_list_entry *fle;
list_for_each_entry(fle, head, ps_list) {
list_for_each_entry(fle, head, used_list) {
if (fle->fe->fd == fd)
return fle;
}
......@@ -848,7 +848,7 @@ static struct fdinfo_list_entry *autofs_pipe_le(struct pstree_item *master,
if (entry->has_read_fd)
pipe_fd = entry->read_fd;
ple = find_fle_by_fd(&rsti(master)->fds, pipe_fd);
ple = find_fle_by_fd(&rsti(master)->used, pipe_fd);
if (!ple) {
pr_err("Failed to find pipe fd %d in process %d\n",
pipe_fd, master->pid->ns[0].virt);
......@@ -887,6 +887,22 @@ static int autofs_create_fle(struct pstree_item *task, FdinfoEntry *fe,
return 0;
}
static int autofs_open_pipefd(struct file_desc *d, int *new_fd)
{
struct fdinfo_list_entry *fle = file_master(d);
int ret;
if (fle->stage < FLE_OPEN) {
ret = open_pipe(d, new_fd);
if (ret != 0)
return ret;
set_fds_event(fle->pid);
return 1;
}
return autofs_post_open(d, fle->fe->fd);
}
static int autofs_create_pipe(struct pstree_item *task, autofs_info_t *i,
struct fdinfo_list_entry *ple)
{
......@@ -903,7 +919,7 @@ static int autofs_create_pipe(struct pstree_item *task, autofs_info_t *i,
if (!ops)
return -1;
memcpy(ops, pi->d.ops, sizeof(*ops));
ops->post_open = autofs_post_open;
ops->open = autofs_open_pipefd;
pe = shmalloc(sizeof(*pe));
if (!pe)
......
......@@ -114,13 +114,19 @@ const struct fdtype_ops eventpoll_dump_ops = {
.dump = dump_one_eventpoll,
};
static int eventpoll_post_open(struct file_desc *d, int fd);
static int eventpoll_open(struct file_desc *d, int *new_fd)
{
struct fdinfo_list_entry *fle = file_master(d);
struct eventpoll_file_info *info;
int tmp;
info = container_of(d, struct eventpoll_file_info, d);
if (fle->stage >= FLE_OPEN)
return eventpoll_post_open(d, fle->fe->fd);
pr_info_eventpoll("Restore ", info->efe);
tmp = epoll_create(1);
......@@ -137,7 +143,7 @@ static int eventpoll_open(struct file_desc *d, int *new_fd)
}
*new_fd = tmp;
return 0;
return 1;
err_close:
close(tmp);
return -1;
......@@ -218,7 +224,6 @@ static void eventpoll_collect_fd(struct file_desc *d,
static struct file_desc_ops desc_ops = {
.type = FD_TYPES__EVENTPOLL,
.open = eventpoll_open,
.post_open = eventpoll_post_open,
.collect_fd = eventpoll_collect_fd,
};
......
......@@ -178,38 +178,6 @@ void wait_fds_event(void)
futex_wait_if_cond(f, FDS_EVENT, &);
clear_fds_event();
}
/*
* A file may be shared between several file descriptors. E.g
* when doing a fork() every fd of a forker and respective fds
* of the child have such. Another way of getting shared files
* is by dup()-ing them or sending them via unix sockets in
* SCM_RIGHTS message.
*
* We restore this type of things in 3 steps (states[] below)
*
* 1. Prepare step.
* Select which task will create the file (open() one, or
* call any other syscall for than (socket, pipe, etc.). All
* the others, that share one, create unix sockets under the
* respective file descriptor (transport socket).
* 2. Open step.
* The one who creates the file (the 'master') creates one,
* then creates one more unix socket (transport) and sends the
* created file over this socket to the other recipients.
* 3. Receive step.
* Those, who wait for the file to appear, receive one via
* the transport socket, then close the socket and dup() the
* received file descriptor into its place.
*
* There's the 4th step in the states[] array -- the post_open
* one. This one is not about file-sharing resolving, but about
* doing something with a file using it's 'desired' fd. The
* thing is that while going the 3-step process above, the file
* may appear in variuos places in the task's fd table, and if
* we want to do something with it's _final_ descriptor value,
* we should wait for it to appear there. So the post_open is
* called when the file is finally set into its place.
*/
struct fdinfo_list_entry *file_master(struct file_desc *d)
{
......@@ -888,14 +856,7 @@ struct fd_open_state {
int (*cb)(int, struct fdinfo_list_entry *);
};
static int open_fd(int pid, struct fdinfo_list_entry *fle);
static int receive_fd(int pid, struct fdinfo_list_entry *fle);
static int post_open_fd(int pid, struct fdinfo_list_entry *fle);
static struct fd_open_state states[] = {
{ "create", open_fd, },
{ "post_create", post_open_fd, },
};
static void transport_name_gen(struct sockaddr_un *addr, int *len, int pid)
{
......@@ -1004,29 +965,6 @@ static int send_fd_to_self(int fd, struct fdinfo_list_entry *fle)
return 0;
}
static int post_open_fd(int pid, struct fdinfo_list_entry *fle)
{
struct file_desc *d = fle->desc;
if (fle != file_master(d)) {
if (receive_fd(pid, fle) != 0) {
pr_err("Can't receive\n");
return -1;
}
if (!is_service_fd(fle->fe->fd, CTL_TTY_OFF))
goto out;
}
if (!d->ops->post_open)
goto out;
if (d->ops->post_open(d, fle->fe->fd))
return -1;
out:
fle->stage = FLE_RESTORED;
return 0;
}
static int serve_out_fd(int pid, int fd, struct file_desc *d)
{
int ret;
......@@ -1051,16 +989,10 @@ out:
return ret;
}
static int open_fd(int pid, struct fdinfo_list_entry *fle)
static int setup_and_serve_out(struct fdinfo_list_entry *fle, int new_fd)
{
struct file_desc *d = fle->desc;
int new_fd;
if (fle != file_master(d))
return 0;
if (d->ops->open(d, &new_fd) < 0)
return -1;
pid_t pid = fle->pid;
if (reopen_fd_as(fle->fe->fd, new_fd))
return -1;
......@@ -1073,7 +1005,50 @@ static int open_fd(int pid, struct fdinfo_list_entry *fle)
BUG_ON(fle->stage != FLE_INITIALIZED);
fle->stage = FLE_OPEN;
return serve_out_fd(pid, fle->fe->fd, d);
if (serve_out_fd(pid, fle->fe->fd, d))
return -1;
return 0;
}
static int open_fd(int pid, struct fdinfo_list_entry *fle)
{
struct file_desc *d = fle->desc;
struct fdinfo_list_entry *flem;
int new_fd = -1, ret;
flem = file_master(d);
if (fle != flem) {
BUG_ON (fle->stage != FLE_INITIALIZED);
ret = receive_fd(pid, fle);
if (ret != 0)
return ret;
fle->stage = FLE_RESTORED;
return 0;
}
/*
* Open method returns the following values:
* 0 -- restore is successefuly finished;
* 1 -- restore is in process or can't be started
* yet, because of it depends on another fles,
* so the method should be called once again;
* -1 -- restore failed.
* In case of 0 and 1 return values, new_fd may
* be not negative. In this case it contains newly
* opened file descriptor, which may be served out.
* For every fle, new_fd is populated only once.
* See setup_and_serve_out() BUG_ON for the details.
*/
ret = d->ops->open(d, &new_fd);
if (ret != -1 && new_fd >= 0) {
if (setup_and_serve_out(fle, new_fd) < 0)
return -1;
}
if (ret == 0)
fle->stage = FLE_RESTORED;
return ret;
}
static int receive_fd(int pid, struct fdinfo_list_entry *fle)
......@@ -1097,25 +1072,49 @@ static int receive_fd(int pid, struct fdinfo_list_entry *fle)
return 0;
}
static int open_fdinfo(int pid, struct fdinfo_list_entry *fle, int state)
{
pr_info("\tRestoring fd %d (state -> %s)\n",
fle->fe->fd, states[state].name);
return states[state].cb(pid, fle);
}
static int open_fdinfos(int pid, struct list_head *list)
{
int state, ret = 0;
struct fdinfo_list_entry *fle;
for (state = 0; state < ARRAY_SIZE(states); state++) {
list_for_each_entry(fle, list, ps_list) {
ret = open_fdinfo(pid, fle, state);
if (ret)
break;
struct fdinfo_list_entry *fle, *tmp, *service_fle = NULL;
LIST_HEAD(completed);
bool progress, again;
int st, ret = 0;
do {
progress = again = false;
clear_fds_event();
list_for_each_entry_safe(fle, tmp, list, ps_list) {
st = fle->stage;
BUG_ON(st == FLE_RESTORED);
ret = open_fd(pid, fle);
if (ret == -1)
goto splice;
if (st != fle->stage || ret == 0)
progress = true;
if (ret == 0) {
/*
* We delete restored items from fds list,
* so open() methods may base on this feature
* and reduce number of fles in their checks.
*/
list_del(&fle->ps_list);
list_add(&fle->ps_list, &completed);
}
if (ret == 1)
again = true;
if (fle->fe->fd == get_service_fd(CTL_TTY_OFF))
service_fle = fle;
}
}
if (!progress && again)
wait_fds_event();
} while (again || progress);
BUG_ON(!list_empty(list));
splice:
list_splice(&completed, list);
if (ret == 0 && service_fle)
ret = tty_restore_ctl_terminal(service_fle->desc, service_fle->fe->fd);
return ret;
}
......
......@@ -105,11 +105,6 @@ struct file_desc_ops {
* so it shouldn't be saved for any post-actions.
*/
int (*open)(struct file_desc *d, int *new_fd);
/*
* Called on a file when all files of that type are opened
* and with the fd being the "restored" one.
*/
int (*post_open)(struct file_desc *d, int fd);
/*
* Called to collect a new fd before adding it on desc. Clients
* may chose to collect it to some specific rst_info list. See
......
......@@ -32,6 +32,8 @@ extern int prepare_shared_tty(void);
extern int tty_prep_fds(void);
extern void tty_fini_fds(void);
extern int tty_restore_ctl_terminal(struct file_desc *d, int fd);
#define OPT_SHELL_JOB "shell-job"
#endif /* __CR_TTY_H__ */
......@@ -490,7 +490,6 @@ static int post_open_inet_sk(struct file_desc *d, int sk);
static struct file_desc_ops inet_desc_ops = {
.type = FD_TYPES__INETSK,
.open = open_inet_sk,
.post_open = post_open_inet_sk,
};
static inline int tcp_connection(InetSkEntry *ie)
......@@ -585,8 +584,8 @@ static int post_open_inet_sk(struct file_desc *d, int sk)
if (ii->ie->opts->reuseaddr)
return 0;
while (atomic_read(&ii->port->users))
wait_fds_event();
if (atomic_read(&ii->port->users))
return 1;
val = ii->ie->opts->reuseaddr;
if (restore_opt(sk, SOL_SOCKET, SO_REUSEADDR, &val))
......@@ -606,10 +605,14 @@ int restore_ip_opts(int sk, IpOptsEntry *ioe)
}
static int open_inet_sk(struct file_desc *d, int *new_fd)
{
struct fdinfo_list_entry *fle = file_master(d);
struct inet_sk_info *ii;
InetSkEntry *ie;
int sk, yes = 1;
if (fle->stage >= FLE_OPEN)
return post_open_inet_sk(d, fle->fe->fd);
ii = container_of(d, struct inet_sk_info, d);
ie = ii->ie;
......@@ -702,8 +705,7 @@ done:
goto err;
*new_fd = sk;
return 0;
return 1;
err:
close(sk);
return -1;
......
......@@ -902,8 +902,8 @@ static int post_open_unix_sk(struct file_desc *d, int fd)
/* Skip external sockets */
if (!list_empty(&peer->d.fd_info_head))
while (peer_is_not_prepared(peer))
wait_fds_event();
if (peer_is_not_prepared(peer))
return 1;
if (ui->ue->uflags & USK_INHERIT)
return 0;
......@@ -1269,14 +1269,19 @@ out:
return -1;
*new_fd = sk;
return 0;
return 1;
}
static int open_unix_sk(struct file_desc *d, int *new_fd)
{
struct fdinfo_list_entry *fle;
struct unix_sk_info *ui;
int ret;
fle = file_master(d);
if (fle->stage >= FLE_OPEN)
return post_open_unix_sk(d, fle->fe->fd);
ui = container_of(d, struct unix_sk_info, d);
if (inherited_fd(d, new_fd)) {
......@@ -1310,7 +1315,6 @@ static char *socket_d_name(struct file_desc *d, char *buf, size_t s)
static struct file_desc_ops unix_desc_ops = {
.type = FD_TYPES__UNIXSK,
.open = open_unix_sk,
.post_open = post_open_unix_sk,
.name = socket_d_name,
};
......
......@@ -659,7 +659,7 @@ static int tty_set_prgp(int fd, int group)
return 0;
}
static int tty_restore_ctl_terminal(struct file_desc *d, int fd)
int tty_restore_ctl_terminal(struct file_desc *d, int fd)
{
struct tty_info *info = container_of(d, struct tty_info, d);
struct tty_driver *driver = info->driver;
......@@ -667,8 +667,7 @@ static int tty_restore_ctl_terminal(struct file_desc *d, int fd)
struct file_desc *slave_d;
int slave = -1, ret = -1, index = -1;
if (!is_service_fd(fd, CTL_TTY_OFF))
return 0;
BUG_ON(!is_service_fd(fd, CTL_TTY_OFF));
if (driver->type == TTY_TYPE__EXT_TTY) {
slave = -1;
......@@ -708,7 +707,7 @@ out:
err:
pty_free_fake_reg(&fake);
close(fd);
return ret;
return ret ? -1 : 0;
}
static bool tty_is_master(struct tty_info *info)
......@@ -1152,7 +1151,6 @@ static char *tty_d_name(struct file_desc *d, char *buf, size_t s)
static struct file_desc_ops tty_desc_ops = {
.type = FD_TYPES__TTY,
.open = tty_open,
.post_open = tty_restore_ctl_terminal,
.collect_fd = tty_collect_fd,
.name = tty_d_name,
};
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment