Commit 5480a2a4 authored by Mike Rapoport's avatar Mike Rapoport Committed by Andrei Vagin

kerndat: add test for availability of PR_SET_THP_DISABLE prctl

The PR_SET_THP_DISABLE prctl allows control of transparent huge pages on
per-process basis. It is available since Linux 3.15, but until recently it
set VM_NOHUGEPAGE for all VMAs created after prctl() call, which prevents
proper restore for combination of pre- and post-copy. A recent change to
prctl(PR_SET_THP_DISABLE) behavior eliminates the use of per-VMA flags and
we can use the new version of the prctl() to disable THP.
Acked-by: 's avatarPavel Emelyanov <xemul@virtuozzo.com>
Signed-off-by: 's avatarMike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: 's avatarAndrei Vagin <avagin@virtuozzo.com>
parent 9f61de87
......@@ -51,6 +51,7 @@ struct kerndat_s {
int lsm;
bool has_uffd;
unsigned long uffd_features;
bool has_thp_disable;
};
extern struct kerndat_s kdat;
......
......@@ -74,4 +74,12 @@ struct prctl_mm_map {
# define PR_GET_TID_ADDRESS 40
#endif
#ifndef PR_SET_THP_DISABLE
# define PR_SET_THP_DISABLE 41
#endif
#ifndef PR_GET_THP_DISABLE
# define PR_GET_THP_DISABLE 42
#endif
#endif /* __CR_PRCTL_H__ */
......@@ -12,6 +12,7 @@
#include <stdint.h>
#include <sys/socket.h>
#include <arpa/inet.h> /* for sockaddr_in and inet_ntoa() */
#include <sys/prctl.h>
#include "int.h"
#include "log.h"
......@@ -31,6 +32,7 @@
#include <compel/compel.h>
#include "netfilter.h"
#include "linux/userfaultfd.h"
#include "prctl.h"
struct kerndat_s kdat = {
};
......@@ -755,6 +757,75 @@ int kerndat_uffd(void)
return 0;
}
int kerndat_has_thp_disable(void)
{
struct bfd f;
void *addr;
char *str;
int ret = -1;
bool vma_match = false;
if (prctl(PR_SET_THP_DISABLE, 1, 0, 0, 0)) {
if (errno != EINVAL)
return -1;
pr_info("PR_SET_THP_DISABLE is not available\n");
return 0;
}
addr = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
if (addr == MAP_FAILED) {
pr_perror("Can't mmap memory for THP disable test");
return -1;
}
if (prctl(PR_SET_THP_DISABLE, 0, 0, 0, 0))
return -1;
f.fd = open("/proc/self/smaps", O_RDONLY);
if (f.fd < 0) {
pr_perror("Can't open /proc/self/smaps");
goto out;
}
if (bfdopenr(&f))
goto out;
while ((str = breadline(&f)) != NULL) {
if (IS_ERR(str))
goto out;
if (is_vma_range_fmt(str)) {
unsigned long vma_addr;
if (sscanf(str, "%lx-", &vma_addr) != 1) {
pr_err("Can't parse: %s\n", str);
goto out;
}
if (vma_addr == (unsigned long)addr)
vma_match = true;
}
if (vma_match && !strncmp(str, "VmFlags: ", 9)) {
u32 flags = 0;
u64 madv = 0;
int io_pf = 0;
parse_vmflags(str, &flags, &madv, &io_pf);
kdat.has_thp_disable = !(madv & (1 << MADV_NOHUGEPAGE));
break;
}
}
ret = 0;
out:
bclose(&f);
munmap(addr, PAGE_SIZE);
return ret;
}
int kerndat_init(void)
{
int ret;
......@@ -795,6 +866,8 @@ int kerndat_init(void)
ret = kerndat_detect_stack_guard_gap();
if (!ret)
ret = kerndat_uffd();
if (!ret)
ret = kerndat_has_thp_disable();
kerndat_lsm();
kerndat_mmap_min_addr();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment