Commit ebc3421d authored by Cyrill Gorcunov's avatar Cyrill Gorcunov Committed by Pavel Emelyanov

tty: Write unread pty buffers on post dump stage

Updated version attached.

>From 6c0e1522e01e01aa89861862fbdf039a0892b89b Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Tue, 12 Apr 2016 20:00:24 +0300
Subject: [PATCH 1/2] tty: Write unread pty buffers on post dump stage

When unread data present on peers we currently simply ignore it but
actually we can try to fetch it in non(that)destructive way.

For this sake at the end of dump procedure (because fetching
queued data may go wrong and we will have to write it back,
which is heavy, and we need all ttys under our hands)
we walk over all collected TTYs and link PTYs peers which
indices are matching. Note to not overload tty_dump_info we
reuse @list member for new @all_ptys list.

Once link established we literally read queued data and flush
it into new tty-data.img. If something go wrong at this moment,
we stop reading queued data but walk back over already queued
ones and write them back to restore former state. Same applies
if the dump has been requested to leave task alive.

On restore we link peers back and write queued data once
peer back to live.
Signed-off-by: 's avatarCyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: 's avatarPavel Emelyanov <xemul@virtuozzo.com>
parent 4497ac8e
...@@ -1746,7 +1746,7 @@ int cr_dump_tasks(pid_t pid) ...@@ -1746,7 +1746,7 @@ int cr_dump_tasks(pid_t pid)
if (ret) if (ret)
goto err; goto err;
ret = tty_verify_active_pairs(); ret = tty_post_actions();
if (ret) if (ret)
goto err; goto err;
......
...@@ -176,6 +176,7 @@ static struct collect_image_info *cinfos[] = { ...@@ -176,6 +176,7 @@ static struct collect_image_info *cinfos[] = {
&fanotify_mark_cinfo, &fanotify_mark_cinfo,
&tty_info_cinfo, &tty_info_cinfo,
&tty_cinfo, &tty_cinfo,
&tty_cdata,
&tunfile_cinfo, &tunfile_cinfo,
&ext_file_cinfo, &ext_file_cinfo,
&timerfd_cinfo, &timerfd_cinfo,
...@@ -243,9 +244,6 @@ static int root_prepare_shared(void) ...@@ -243,9 +244,6 @@ static int root_prepare_shared(void)
return -1; return -1;
} }
if (tty_verify_active_pairs())
return -1;
for_each_pstree_item(pi) { for_each_pstree_item(pi) {
if (pi->pid.state == TASK_HELPER) if (pi->pid.state == TASK_HELPER)
continue; continue;
......
...@@ -81,6 +81,7 @@ struct cr_fd_desc_tmpl imgset_template[CR_FD_MAX] = { ...@@ -81,6 +81,7 @@ struct cr_fd_desc_tmpl imgset_template[CR_FD_MAX] = {
FD_ENTRY(BINFMT_MISC, "binfmt-misc-%d"), FD_ENTRY(BINFMT_MISC, "binfmt-misc-%d"),
FD_ENTRY(TTY_FILES, "tty"), FD_ENTRY(TTY_FILES, "tty"),
FD_ENTRY(TTY_INFO, "tty-info"), FD_ENTRY(TTY_INFO, "tty-info"),
FD_ENTRY_F(TTY_DATA, "tty-data", O_NOBUF),
FD_ENTRY(FILE_LOCKS, "filelocks"), FD_ENTRY(FILE_LOCKS, "filelocks"),
FD_ENTRY(RLIMIT, "rlimit-%d"), FD_ENTRY(RLIMIT, "rlimit-%d"),
FD_ENTRY_F(PAGES, "pages-%u", O_NOBUF), FD_ENTRY_F(PAGES, "pages-%u", O_NOBUF),
......
...@@ -69,6 +69,7 @@ enum { ...@@ -69,6 +69,7 @@ enum {
CR_FD_FIFO_DATA, CR_FD_FIFO_DATA,
CR_FD_TTY_FILES, CR_FD_TTY_FILES,
CR_FD_TTY_INFO, CR_FD_TTY_INFO,
CR_FD_TTY_DATA,
CR_FD_REMAP_FPATH, CR_FD_REMAP_FPATH,
CR_FD_EVENTFD_FILE, CR_FD_EVENTFD_FILE,
CR_FD_EVENTPOLL_FILE, CR_FD_EVENTPOLL_FILE,
......
...@@ -76,6 +76,7 @@ ...@@ -76,6 +76,7 @@
#define NETNS_MAGIC 0x55933752 /* Dolgoprudny */ #define NETNS_MAGIC 0x55933752 /* Dolgoprudny */
#define TTY_FILES_MAGIC 0x59433025 /* Pushkin */ #define TTY_FILES_MAGIC 0x59433025 /* Pushkin */
#define TTY_INFO_MAGIC 0x59453036 /* Kolpino */ #define TTY_INFO_MAGIC 0x59453036 /* Kolpino */
#define TTY_DATA_MAGIC 0x59413026 /* Pavlovsk */
#define FILE_LOCKS_MAGIC 0x54323616 /* Kaluga */ #define FILE_LOCKS_MAGIC 0x54323616 /* Kaluga */
#define RLIMIT_MAGIC 0x57113925 /* Rostov */ #define RLIMIT_MAGIC 0x57113925 /* Rostov */
#define FANOTIFY_FILE_MAGIC 0x55096122 /* Chelyabinsk */ #define FANOTIFY_FILE_MAGIC 0x55096122 /* Chelyabinsk */
......
...@@ -57,6 +57,7 @@ enum { ...@@ -57,6 +57,7 @@ enum {
PB_USERNS, PB_USERNS,
PB_NETNS, PB_NETNS,
PB_BINFMT_MISC, /* 50 */ PB_BINFMT_MISC, /* 50 */
PB_TTY_DATA,
/* PB_AUTOGEN_STOP */ /* PB_AUTOGEN_STOP */
......
...@@ -22,13 +22,13 @@ static inline int is_tty(dev_t rdev, dev_t dev) ...@@ -22,13 +22,13 @@ static inline int is_tty(dev_t rdev, dev_t dev)
return get_tty_driver(rdev, dev) != NULL; return get_tty_driver(rdev, dev) != NULL;
} }
extern int tty_post_actions(void);
extern int dump_verify_tty_sids(void); extern int dump_verify_tty_sids(void);
extern struct collect_image_info tty_info_cinfo; extern struct collect_image_info tty_info_cinfo;
extern struct collect_image_info tty_cinfo; extern struct collect_image_info tty_cinfo;
extern struct collect_image_info tty_cdata;
extern int prepare_shared_tty(void); extern int prepare_shared_tty(void);
extern int tty_verify_active_pairs(void);
extern int tty_prep_fds(void); extern int tty_prep_fds(void);
extern void tty_fini_fds(void); extern void tty_fini_fds(void);
......
...@@ -78,6 +78,11 @@ struct tty_info_entry { ...@@ -78,6 +78,11 @@ struct tty_info_entry {
TtyInfoEntry *tie; TtyInfoEntry *tie;
}; };
struct tty_data_entry {
struct list_head list;
TtyDataEntry *tde;
};
struct tty_info { struct tty_info {
struct list_head list; struct list_head list;
struct file_desc d; struct file_desc d;
...@@ -94,6 +99,8 @@ struct tty_info { ...@@ -94,6 +99,8 @@ struct tty_info {
bool inherit; bool inherit;
struct tty_info *ctl_tty; struct tty_info *ctl_tty;
struct tty_info *link;
struct tty_data_entry *tty_data;
}; };
struct tty_dump_info { struct tty_dump_info {
...@@ -104,6 +111,13 @@ struct tty_dump_info { ...@@ -104,6 +111,13 @@ struct tty_dump_info {
pid_t pgrp; pid_t pgrp;
int fd; int fd;
struct tty_driver *driver; struct tty_driver *driver;
int index;
int lfd;
int flags;
struct tty_dump_info *link;
void *tty_data;
size_t tty_data_size;
}; };
static LIST_HEAD(all_tty_info_entries); static LIST_HEAD(all_tty_info_entries);
...@@ -346,7 +360,7 @@ static int tty_get_index(u32 id) ...@@ -346,7 +360,7 @@ static int tty_get_index(u32 id)
} }
/* Make sure the active pairs do exist */ /* Make sure the active pairs do exist */
int tty_verify_active_pairs(void) static int tty_verify_active_pairs(void * unused)
{ {
unsigned long i, unpaired_slaves = 0; unsigned long i, unpaired_slaves = 0;
...@@ -801,6 +815,28 @@ static int restore_tty_params(int fd, struct tty_info *info) ...@@ -801,6 +815,28 @@ static int restore_tty_params(int fd, struct tty_info *info)
return userns_call(do_restore_tty_parms, UNS_ASYNC, &p, sizeof(p), fd); return userns_call(do_restore_tty_parms, UNS_ASYNC, &p, sizeof(p), fd);
} }
/*
* When we restore queued data we don't exit if error happened:
* the terminals never was a transport with guaranted delivery,
* it's up to application which uses it to guaratee the data
* integrity.
*/
static void pty_restore_queued_data(struct tty_info *info, int fd)
{
if (info && info->tty_data) {
ProtobufCBinaryData bd = info->tty_data->tde->data;
int retval;
pr_debug("restore queued data on %#x (%zu bytes)\n",
info->tfe->id, (size_t)bd.len);
retval = write(fd, bd.data, bd.len);
if (retval != bd.len)
pr_err("Restored %d bytes while %zu expected\n",
retval, (size_t)bd.len);
}
}
static int pty_open_slaves(struct tty_info *info) static int pty_open_slaves(struct tty_info *info)
{ {
int sock = -1, fd = -1, ret = -1; int sock = -1, fd = -1, ret = -1;
...@@ -835,6 +871,7 @@ static int pty_open_slaves(struct tty_info *info) ...@@ -835,6 +871,7 @@ static int pty_open_slaves(struct tty_info *info)
goto err; goto err;
} }
pty_restore_queued_data(slave->link, fd);
close(fd); close(fd);
fd = -1; fd = -1;
} }
...@@ -975,6 +1012,8 @@ static int pty_open_ptmx(struct tty_info *info) ...@@ -975,6 +1012,8 @@ static int pty_open_ptmx(struct tty_info *info)
if (pty_open_slaves(info)) if (pty_open_slaves(info))
goto err; goto err;
pty_restore_queued_data(info->link, master);
if (info->tie->locked) if (info->tie->locked)
lock_pty(master); lock_pty(master);
...@@ -1238,6 +1277,28 @@ static int tty_setup_slavery(void * unused) ...@@ -1238,6 +1277,28 @@ static int tty_setup_slavery(void * unused)
{ {
struct tty_info *info, *peer, *m; struct tty_info *info, *peer, *m;
/*
* Setup links for PTY terminal pairs by
* their indices, queued data already bound
* to them by data ids.
*/
list_for_each_entry(info, &all_ttys, list) {
if (!is_pty(info->driver) || info->link)
continue;
peer = info;
list_for_each_entry_continue(peer, &all_ttys, list) {
if (!is_pty(peer->driver) || peer->link)
continue;
if (peer->tie->pty->index == info->tie->pty->index) {
info->link = peer;
peer->link = info;
pr_debug("Link PTYs (%#x)\n", info->tfe->id);
break;
}
}
}
/* /*
* The image may carry several terminals opened * The image may carry several terminals opened
* belonging to the same session, so choose the * belonging to the same session, so choose the
...@@ -1430,6 +1491,8 @@ static int collect_one_tty(void *obj, ProtobufCMessage *msg, struct cr_img *i) ...@@ -1430,6 +1491,8 @@ static int collect_one_tty(void *obj, ProtobufCMessage *msg, struct cr_img *i)
info->create = tty_is_master(info); info->create = tty_is_master(info);
info->inherit = false; info->inherit = false;
info->ctl_tty = NULL; info->ctl_tty = NULL;
info->tty_data = NULL;
info->link = NULL;
if (verify_info(info)) if (verify_info(info))
return -1; return -1;
...@@ -1466,14 +1529,12 @@ static int collect_one_tty(void *obj, ProtobufCMessage *msg, struct cr_img *i) ...@@ -1466,14 +1529,12 @@ static int collect_one_tty(void *obj, ProtobufCMessage *msg, struct cr_img *i)
pr_info("Collected tty ID %#x (%s)\n", info->tfe->id, info->driver->name); pr_info("Collected tty ID %#x (%s)\n", info->tfe->id, info->driver->name);
if (list_empty(&all_ttys)) if (list_empty(&all_ttys)) {
/* if (add_post_prepare_cb(tty_verify_active_pairs, NULL))
* XXX -- not every tty requires this. return -1;
* Check that we have such here and queue
* post-cb only if required.
*/
if (add_post_prepare_cb(tty_setup_slavery, NULL)) if (add_post_prepare_cb(tty_setup_slavery, NULL))
return -1; return -1;
}
list_add(&info->list, &all_ttys); list_add(&info->list, &all_ttys);
return file_desc_add(&info->d, info->tfe->id, &tty_desc_ops); return file_desc_add(&info->d, info->tfe->id, &tty_desc_ops);
...@@ -1486,6 +1547,33 @@ struct collect_image_info tty_cinfo = { ...@@ -1486,6 +1547,33 @@ struct collect_image_info tty_cinfo = {
.collect = collect_one_tty, .collect = collect_one_tty,
}; };
static int collect_one_tty_data(void *obj, ProtobufCMessage *msg, struct cr_img *i)
{
struct tty_data_entry *tdo = obj;
struct tty_info *info;
tdo->tde = pb_msg(msg, TtyDataEntry);
pr_debug("Collected data for id %#x (size %zu bytes)\n",
tdo->tde->tty_id, (size_t)tdo->tde->data.len);
list_for_each_entry(info, &all_ttys, list) {
if (tdo->tde->tty_id == info->tie->id) {
info->tty_data = tdo;
return 0;
}
}
pr_err("No tty found to queued data on id %#x\n", tdo->tde->tty_id);
return -ENOENT;
}
struct collect_image_info tty_cdata = {
.fd_type = CR_FD_TTY_DATA,
.pb_type = PB_TTY_DATA,
.priv_size = sizeof(struct tty_data_entry),
.collect = collect_one_tty_data,
};
/* Make sure the ttys we're dumping do belong our process tree */ /* Make sure the ttys we're dumping do belong our process tree */
int dump_verify_tty_sids(void) int dump_verify_tty_sids(void)
{ {
...@@ -1531,7 +1619,6 @@ int dump_verify_tty_sids(void) ...@@ -1531,7 +1619,6 @@ int dump_verify_tty_sids(void)
} }
} }
} }
xfree(dinfo);
} }
return ret; return ret;
...@@ -1564,7 +1651,7 @@ static int dump_tty_info(int lfd, u32 id, const struct fd_parms *p, struct tty_d ...@@ -1564,7 +1651,7 @@ static int dump_tty_info(int lfd, u32 id, const struct fd_parms *p, struct tty_d
if (!pti) if (!pti)
return -1; return -1;
dinfo = xmalloc(sizeof(*dinfo)); dinfo = xzalloc(sizeof(*dinfo));
if (!dinfo) if (!dinfo)
return -1; return -1;
...@@ -1574,6 +1661,20 @@ static int dump_tty_info(int lfd, u32 id, const struct fd_parms *p, struct tty_d ...@@ -1574,6 +1661,20 @@ static int dump_tty_info(int lfd, u32 id, const struct fd_parms *p, struct tty_d
dinfo->fd = p->fd; dinfo->fd = p->fd;
dinfo->driver = driver; dinfo->driver = driver;
if (is_pty(driver)) {
dinfo->lfd = dup(lfd);
if (dinfo->lfd < 0) {
pr_perror("Can't dup local fd on %x", id);
xfree(dinfo);
return -1;
}
dinfo->index = index;
dinfo->flags = p->flags;
} else {
dinfo->index = -1;
dinfo->lfd = -1;
}
list_add_tail(&dinfo->list, &all_ttys); list_add_tail(&dinfo->list, &all_ttys);
info.id = id; info.id = id;
...@@ -1711,6 +1812,214 @@ const struct fdtype_ops tty_dump_ops = { ...@@ -1711,6 +1812,214 @@ const struct fdtype_ops tty_dump_ops = {
.dump = dump_one_tty, .dump = dump_one_tty,
}; };
static int tty_reblock(int id, int lfd, int flags)
{
static const int fmask = O_RDWR | O_NONBLOCK;
int ret;
if ((flags & fmask) != fmask) {
if (fcntl(lfd, F_SETFL, flags)) {
ret = -errno;
pr_perror("Can't revert mode back to %o on (%#x)\n", fmask, id);
return ret;
}
}
return 0;
}
static int tty_unblock(int id, int lfd, int flags)
{
static const int fmask = O_RDWR | O_NONBLOCK;
int ret;
if ((flags & fmask) != fmask) {
if (fcntl(lfd, F_SETFL, fmask)) {
ret = -errno;
pr_perror("Can't change mode to %o on (%#x)\n", fmask, id);
return ret;
}
}
return 0;
}
static int tty_do_dump_queued_data(struct tty_dump_info *dinfo)
{
TtyDataEntry e = TTY_DATA_ENTRY__INIT;
size_t off = 0, size = 16384;
char *buf;
int ret;
buf = xmalloc(size);
if (!buf)
return -ENOMEM;
ret = tty_unblock(dinfo->id, dinfo->lfd, dinfo->flags);
if (ret) {
xfree(buf);
return ret;
}
while (1) {
ret = read(dinfo->lfd, &buf[off], size - off);
if (ret == 0) {
pr_debug("No more data on tty (%s %#x)\n",
dinfo->driver->name, dinfo->id);
break;
} else if (ret < 0) {
if (errno == EAGAIN) {
pr_debug("Not waiting data tty (%s %#x)\n",
dinfo->driver->name, dinfo->id);
break;
} else {
ret = -errno;
pr_perror("Can't read data from tty (%s %#x)",
dinfo->driver->name, dinfo->id);
xfree(buf);
return ret;
}
}
off += ret;
pr_debug("Read %d bytes (%d) from tty (%s %#x)\n",
ret, (int)off, dinfo->driver->name, dinfo->id);
if (off >= size) {
pr_err("The tty (%s %#x) queued data overrflow %zu bytes limit\n",
dinfo->driver->name, dinfo->id, size);
off = size;
break;
}
}
if (off) {
dinfo->tty_data = buf;
dinfo->tty_data_size = off;
e.tty_id = dinfo->id;
e.data.data = (void *)buf;
e.data.len = off;
ret = pb_write_one(img_from_set(glob_imgset, CR_FD_TTY_DATA),
&e, PB_TTY_DATA);
} else {
xfree(buf);
ret = 0;
}
return ret;
}
/*
* If error happens here, so be it, ttys are not delivering
* data with guaranteed results.
*/
static void __tty_do_writeback_queued_data(struct tty_dump_info *dinfo)
{
if (write(dinfo->link->lfd, dinfo->tty_data,
dinfo->tty_data_size) != dinfo->tty_data_size)
pr_perror("Can't writeback to tty (%#x)\n", dinfo->id);
tty_reblock(dinfo->link->id, dinfo->link->lfd, dinfo->link->flags);
}
static void tty_do_writeback_queued_data(struct tty_dump_info *dinfo)
{
if (dinfo->tty_data)
__tty_do_writeback_queued_data(dinfo);
if (dinfo->link->tty_data)
__tty_do_writeback_queued_data(dinfo->link);
}
static void tty_dinfo_free(struct tty_dump_info *dinfo)
{
list_del(&dinfo->list);
close_safe(&dinfo->lfd);
xfree(dinfo->tty_data);
xfree(dinfo);
}
/*
* Dumping queued data must be done at the very end of the
* checkpoint procedure -- it's tail optimization, we trying
* to defer this procedure until everything else passed
* succesfully because in real it is time consuming on
* its own which might require writting data back to the
* former peers if case something go wrong.
*
* Moreover when we gather PTYs peers into own list we
* do it in destructive way -- the former @all_ttys
* list get modified (one of the peer get moved from
* @all_ttys to @all_ptys list) because otherwise we
* will have to add one more entry into tty_dump_info,
* thus we simply reuse the @list entry for own needs.
*/
static int tty_dump_queued_data(void)
{
struct tty_dump_info *dinfo, *peer, *n;
LIST_HEAD(all_ptys);
int ret = 0;
/*
* Link PTY peers, and move one of linked
* into separate list.
*/
list_for_each_entry_safe(dinfo, n, &all_ttys, list) {
if (!is_pty(dinfo->driver) || dinfo->link)
continue;
peer = dinfo;
list_for_each_entry_continue(peer, &all_ttys, list) {
if (!is_pty(peer->driver) || peer->link)
continue;
if (peer->index == dinfo->index) {
dinfo->link = peer;
peer->link = dinfo;
pr_debug("Link PTYs (%#x)\n", dinfo->id);
list_move(&dinfo->list, &all_ptys);
}
}
}
/*
* Once linked fetch the queued data if present.
*/
list_for_each_entry(dinfo, &all_ptys, list) {
ret = tty_do_dump_queued_data(dinfo);
if (ret)
break;
ret = tty_do_dump_queued_data(dinfo->link);
if (ret)
break;
}
if (ret || opts.final_state != TASK_DEAD) {
list_for_each_entry(dinfo, &all_ptys, list)
tty_do_writeback_queued_data(dinfo);
}
list_for_each_entry_safe(dinfo, n, &all_ptys, list) {
tty_dinfo_free(dinfo->link);
tty_dinfo_free(dinfo);
}
list_for_each_entry_safe(dinfo, n, &all_ttys, list)
tty_dinfo_free(dinfo);
return ret;
}
int tty_post_actions(void)
{
if (tty_verify_active_pairs(NULL))
return -1;
else if (tty_dump_queued_data())
return -1;
return 0;
}
int tty_prep_fds(void) int tty_prep_fds(void)
{ {
if (!opts.shell_job) if (!opts.shell_job)
......
...@@ -34,6 +34,11 @@ enum TtyType { ...@@ -34,6 +34,11 @@ enum TtyType {
SERIAL = 6; SERIAL = 6;
} }
message tty_data_entry {
required uint32 tty_id = 1;
required bytes data = 2;
}
message tty_info_entry { message tty_info_entry {
required uint32 id = 1; required uint32 id = 1;
......
...@@ -370,6 +370,7 @@ handlers = { ...@@ -370,6 +370,7 @@ handlers = {
'MNTS' : entry_handler(mnt_entry), 'MNTS' : entry_handler(mnt_entry),
'TTY_FILES' : entry_handler(tty_file_entry), 'TTY_FILES' : entry_handler(tty_file_entry),
'TTY_INFO' : entry_handler(tty_info_entry), 'TTY_INFO' : entry_handler(tty_info_entry),
'TTY_DATA' : entry_handler(tty_data_entry),
'RLIMIT' : entry_handler(rlimit_entry), 'RLIMIT' : entry_handler(rlimit_entry),
'TUNFILE' : entry_handler(tunfile_entry), 'TUNFILE' : entry_handler(tunfile_entry),
'EXT_FILES' : entry_handler(ext_file_entry), 'EXT_FILES' : entry_handler(ext_file_entry),
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment