Commit dbaab31f authored by Gabriel Guimaraes's avatar Gabriel Guimaraes Committed by Pavel Emelyanov

Workaround for the OverlayFS bug present before Kernel 4.2

This is here only to support the Linux Kernel between versions
3.18 and 4.2. After that, this workaround is not needed anymore,
but it will work properly on both a kernel with and without the bug.

The bug is that when a process has a file open in an OverlayFS directory,
the information in /proc/<pid>/fd/<fd> and /proc/<pid>/fdinfo/<fd>
is wrong, so we grab that information from the mountinfo table instead.

This is done every time fill_fdlink is called.
We first check to see if the mnt_id and st_dev numbers currently match
some entry in the mountinfo table. If so, we already have the correct mnt_id
and no fixup is needed.

Then we proceed to see if there are any overlayFS mounted directories
in the mountinfo table. If so, we concatenate the mountpoint with the
name of the file, and stat the resulting path to check if we found the
correct device id and node number. If that is the case, we update the
mount id and link variables with the correct values.
Signed-off-by: 's avatarGabriel Guimaraes <gabriellimaguimaraes@gmail.com>
Signed-off-by: 's avatarPavel Emelyanov <xemul@parallels.com>
parent b9b0730c
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include "eventfd.h" #include "eventfd.h"
#include "eventpoll.h" #include "eventpoll.h"
#include "fsnotify.h" #include "fsnotify.h"
#include "mount.h"
#include "signalfd.h" #include "signalfd.h"
#include "namespaces.h" #include "namespaces.h"
#include "tun.h" #include "tun.h"
...@@ -156,6 +157,56 @@ void show_saved_files(void) ...@@ -156,6 +157,56 @@ void show_saved_files(void)
} }
} }
/*
* Workaround for the OverlayFS bug present before Kernel 4.2
*
* This is here only to support the Linux Kernel between versions
* 3.18 and 4.2. After that, this workaround is not needed anymore,
* but it will work properly on both a kernel with and withouth the bug.
*
* When a process has a file open in an OverlayFS directory,
* the information in /proc/<pid>/fd/<fd> and /proc/<pid>/fdinfo/<fd>
* is wrong. We can't even rely on stat()-ing /proc/<pid>/fd/<fd> since
* this will show us the wrong filesystem type.
*
* So we grab that information from the mountinfo table instead. This is done
* every time fill_fdlink is called. See lookup_overlayfs for more details.
*
*/
static int fixup_overlayfs(struct fd_parms *p, struct fd_link *link)
{
struct mount_info *m;
if (!link)
return 0;
m = lookup_overlayfs(link->name, p->stat.st_dev, p->stat.st_ino, p->mnt_id);
if (IS_ERR(m))
return -1;
if (!m)
return 0;
p->mnt_id = m->mnt_id;
/*
* If the bug is present, the file path from /proc/<pid>/fd
* does not include the mountpoint, so we prepend it ourselves.
*/
if (strcmp("./", m->mountpoint) != 0) {
char buf[PATH_MAX];
int n;
strncpy(buf, link->name, PATH_MAX);
n = snprintf(link->name, PATH_MAX, "%s/%s", m->mountpoint, buf + 2);
if (n >= PATH_MAX) {
pr_err("Not enough space to replace %s\n", buf);
return -1;
}
}
return 0;
}
/* /*
* The gen_id thing is used to optimize the comparison of shared files. * The gen_id thing is used to optimize the comparison of shared files.
* If two files have different gen_ids, then they are different for sure. * If two files have different gen_ids, then they are different for sure.
...@@ -206,6 +257,10 @@ int fill_fdlink(int lfd, const struct fd_parms *p, struct fd_link *link) ...@@ -206,6 +257,10 @@ int fill_fdlink(int lfd, const struct fd_parms *p, struct fd_link *link)
} }
link->len = len + 1; link->len = len + 1;
if (opts.overlayfs)
if (fixup_overlayfs((struct fd_parms *)p, link) < 0)
return -1;
return 0; return 0;
} }
......
...@@ -79,6 +79,7 @@ struct cr_options { ...@@ -79,6 +79,7 @@ struct cr_options {
bool enable_external_sharing; bool enable_external_sharing;
bool enable_external_masters; bool enable_external_masters;
bool aufs; /* auto-deteced, not via cli */ bool aufs; /* auto-deteced, not via cli */
bool overlayfs;
}; };
extern struct cr_options opts; extern struct cr_options opts;
......
...@@ -22,6 +22,8 @@ extern int prepare_mnt_ns(void); ...@@ -22,6 +22,8 @@ extern int prepare_mnt_ns(void);
extern int pivot_root(const char *new_root, const char *put_old); extern int pivot_root(const char *new_root, const char *put_old);
struct mount_info; struct mount_info;
struct mount_info *lookup_overlayfs(char *rpath, unsigned int s_dev,
unsigned int st_ino, unsigned int mnt_id);
extern struct mount_info *lookup_mnt_id(unsigned int id); extern struct mount_info *lookup_mnt_id(unsigned int id);
extern struct mount_info *lookup_mnt_sdev(unsigned int s_dev); extern struct mount_info *lookup_mnt_sdev(unsigned int s_dev);
......
...@@ -242,6 +242,9 @@ int parse_cgroups(struct list_head *cgroups, unsigned int *n_cgroups); ...@@ -242,6 +242,9 @@ int parse_cgroups(struct list_head *cgroups, unsigned int *n_cgroups);
/* callback for AUFS support */ /* callback for AUFS support */
extern int aufs_parse(struct mount_info *mi); extern int aufs_parse(struct mount_info *mi);
/* callback for OverlayFS support */
extern int overlayfs_parse(struct mount_info *mi);
int parse_children(pid_t pid, pid_t **_c, int *_n); int parse_children(pid_t pid, pid_t **_c, int *_n);
#endif /* __CR_PROC_PARSE_H__ */ #endif /* __CR_PROC_PARSE_H__ */
...@@ -127,6 +127,89 @@ static inline int fsroot_mounted(struct mount_info *mi) ...@@ -127,6 +127,89 @@ static inline int fsroot_mounted(struct mount_info *mi)
return is_root(mi->root); return is_root(mi->root);
} }
static struct mount_info *__lookup_overlayfs(struct mount_info *list, char *rpath,
unsigned int st_dev, unsigned int st_ino,
unsigned int mnt_id)
{
/*
* Goes through all entries in the mountinfo table
* looking for a mount point that contains the file specified
* in rpath. Uses the device number st_dev and the inode number st_ino
* to make sure the file is correct.
*/
struct mount_info *mi_ret = NULL;
struct mount_info *m;
int mntns_root = -1;
for (m = list; m != NULL; m = m->next) {
if (m->fstype->code == FSTYPE__OVERLAYFS) {
struct stat f_stat;
int ret_stat;
/*
* We need the mntns root fd of the process to be dumped,
* to make sure we stat the correct file
*/
if (mntns_root == -1) {
mntns_root = __mntns_get_root_fd(root_item->pid.real);
if (mntns_root < 0) {
pr_err("Unable to get the root file descriptor of pid %d\n", root_item->pid.real);
return ERR_PTR(-1);
}
}
/* Concatenates m->mountpoint with rpath and attempts to stat the resulting path */
if (strcmp("./", m->mountpoint) == 0)
ret_stat = fstatat(mntns_root, rpath, &f_stat, 0);
else {
char _full_path[PATH_MAX];
int n = snprintf(_full_path, PATH_MAX, "%s/%s", m->mountpoint, rpath);
if (n >= PATH_MAX) {
pr_err("Not enough space to concatenate %s and %s\n", m->mountpoint, rpath);
return ERR_PTR(-1);
}
ret_stat = fstatat(mntns_root, _full_path, &f_stat, 0);
}
if (ret_stat == 0 && st_dev == f_stat.st_dev && st_ino == f_stat.st_ino)
mi_ret = m;
}
}
return mi_ret;
}
/*
* Looks up the mnt_id and path of a file in an overlayFS directory.
*
* This is useful in order to fix the OverlayFS bug present in the
* Linux Kernel before version 4.2. See fixup_overlayfs for details.
*
* We first check to see if the mnt_id and st_dev numbers currently match
* some entry in the mountinfo table. If so, we already have the correct mnt_id
* and no fixup is needed.
*
* Then we proceed to see if there are any overlayFS mounted directories
* in the mountinfo table. If so, we concatenate the mountpoint with the
* name of the file, and stat the resulting path to check if we found the
* correct device id and node number. If that is the case, we update the
* mount id and link variables with the correct values.
*/
struct mount_info *lookup_overlayfs(char *rpath, unsigned int st_dev,
unsigned int st_ino, unsigned int mnt_id)
{
struct mount_info *m;
/* If the mnt_id and device number match for some entry, no fixup is needed */
for (m = mntinfo; m != NULL; m = m->next)
if (st_dev == m->s_dev && mnt_id == m->mnt_id)
return NULL;
return __lookup_overlayfs(mntinfo, rpath, st_dev, st_ino, mnt_id);
}
static struct mount_info *__lookup_mnt_id(struct mount_info *list, int id) static struct mount_info *__lookup_mnt_id(struct mount_info *list, int id)
{ {
struct mount_info *m; struct mount_info *m;
...@@ -1365,6 +1448,10 @@ static struct fstype fstypes[32] = { ...@@ -1365,6 +1448,10 @@ static struct fstype fstypes[32] = {
.code = FSTYPE__FUSE, .code = FSTYPE__FUSE,
.dump = always_fail, .dump = always_fail,
.restore = always_fail, .restore = always_fail,
}, {
.name = "overlay",
.code = FSTYPE__OVERLAYFS,
.parse = overlayfs_parse,
}, },
}; };
......
...@@ -2057,6 +2057,19 @@ out: ...@@ -2057,6 +2057,19 @@ out:
return exit_code; return exit_code;
} }
/*
* If an OverlayFS mountpoint is found in the mountinfo table,
* we enable opts.overlayfs, which is a workaround for the
* OverlayFS Kernel bug.
*
* See fixup_overlayfs for details.
*/
int overlayfs_parse(struct mount_info *new)
{
opts.overlayfs = true;
return 0;
}
/* /*
* AUFS callback function to "fix up" the root pathname. * AUFS callback function to "fix up" the root pathname.
* See sysfs_parse.c for details. * See sysfs_parse.c for details.
......
...@@ -18,6 +18,7 @@ enum fstype { ...@@ -18,6 +18,7 @@ enum fstype {
MQUEUE = 14; MQUEUE = 14;
FUSE = 15; FUSE = 15;
AUTO = 16; AUTO = 16;
OVERLAYFS = 17;
}; };
message mnt_entry { message mnt_entry {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment