Commit 0ddfaa9b authored by Fyodor Bocharov's avatar Fyodor Bocharov Committed by Pavel Emelyanov

shmem: implement PME derived pages state tracking

Anon shmem pages state tracking allows us not to dump unused
anon shmem pages at all.

To track anon anon shmem pages state we create a bitmap.
Each 2 bits in this bitmap correspond to particular page.
Each 2 bits store one page state:
PST_DONT_DUMP, PST_DUMP, PST_ZERO, PST_DIRTY.
This number of states is enough to decide what to do with the page
on dump.

With anon shmem there is a peculiarity. To decide what state page has
we need to examine its PME bits in all the processes that share it.
So page state derived from PME bits in one process may be overriden
by page state derived from PME bits from another process.
See implementation of this overrides in the patch.
Signed-off-by: 's avatarFyodor Bocharov <fbocharov@yandex.ru>
Signed-off-by: 's avatarEugene Batalov <eabatalov89@gmail.com>
Signed-off-by: 's avatarPavel Emelyanov <xemul@virtuozzo.com>
parent 531c4aa4
......@@ -16,6 +16,7 @@
#include "mem.h"
#include "config.h"
#include "syscall-codes.h"
#include "asm/bitops.h"
#include "protobuf.h"
#include "images/pagemap.pb-c.h"
......@@ -81,6 +82,7 @@ struct shmem_info {
struct { /* For dump */
unsigned long start;
unsigned long end;
unsigned long *pstate_map;
};
};
};
......@@ -120,6 +122,71 @@ static struct shmem_info *shmem_find(unsigned long shmid)
return NULL;
}
#define PST_DONT_DUMP 0
#define PST_DUMP 1
#define PST_ZERO 2
#define PST_DIRTY 3
#define PST_BITS 2
#define PST_BIT0_IX(pfn) ((pfn) * PST_BITS)
#define PST_BIT1_IX(pfn) (PST_BIT0_IX(pfn) + 1)
static unsigned int get_pstate(unsigned long *pstate_map, unsigned long pfn)
{
unsigned int bit0 = test_bit(PST_BIT0_IX(pfn), pstate_map) ? 1 : 0;
unsigned int bit1 = test_bit(PST_BIT1_IX(pfn), pstate_map) ? 1 : 0;
return (bit1 << 1) | bit0;
}
static void set_pstate(unsigned long *pstate_map, unsigned long pfn,
unsigned int pstate)
{
if (pstate & 1)
set_bit(PST_BIT0_IX(pfn), pstate_map);
if (pstate & 2)
set_bit(PST_BIT1_IX(pfn), pstate_map);
}
static int expand_shmem(struct shmem_info *si, unsigned long new_size)
{
unsigned long nr_pages, nr_map_items, map_size,
nr_new_map_items, new_map_size;
nr_pages = DIV_ROUND_UP(si->size, PAGE_SIZE);
nr_map_items = BITS_TO_LONGS(nr_pages * PST_BITS);
map_size = nr_map_items * sizeof(*si->pstate_map);
nr_pages = DIV_ROUND_UP(new_size, PAGE_SIZE);
nr_new_map_items = BITS_TO_LONGS(nr_pages * PST_BITS);
new_map_size = nr_new_map_items * sizeof(*si->pstate_map);
BUG_ON(new_map_size < map_size);
si->pstate_map = xrealloc(si->pstate_map, new_map_size);
if (!si->pstate_map)
return -1;
memzero(si->pstate_map + nr_map_items, new_map_size - map_size);
si->size = new_size;
return 0;
}
static void update_shmem_pmaps(struct shmem_info *si, u64 *map, VmaEntry *vma)
{
unsigned long shmem_pfn, vma_pfn, vma_pgcnt;
vma_pgcnt = DIV_ROUND_UP(si->size - vma->pgoff, PAGE_SIZE);
for (vma_pfn = 0; vma_pfn < vma_pgcnt; ++vma_pfn) {
if (!should_dump_page(vma, map[vma_pfn]))
continue;
shmem_pfn = vma_pfn + DIV_ROUND_UP(vma->pgoff, PAGE_SIZE);
if (map[vma_pfn] & PME_SOFT_DIRTY)
set_pstate(si->pstate_map, shmem_pfn, PST_DIRTY);
else
set_pstate(si->pstate_map, shmem_pfn, PST_DUMP);
}
}
int collect_sysv_shmem(unsigned long shmid, unsigned long size)
{
......@@ -489,26 +556,32 @@ int add_shmem_area(pid_t pid, VmaEntry *vma, u64 *map)
{
struct shmem_info *si;
unsigned long size = vma->pgoff + (vma->end - vma->start);
(void)map;
si = shmem_find(vma->shmid);
if (si) {
if (si->size < size)
si->size = size;
if (si->size < size) {
if (expand_shmem(si, size))
return -1;
}
update_shmem_pmaps(si, map, vma);
return 0;
}
si = xmalloc(sizeof(*si));
si = xzalloc(sizeof(*si));
if (!si)
return -1;
si->size = size;
si->pid = pid;
si->start = vma->start;
si->end = vma->end;
si->shmid = vma->shmid;
shmem_hash_add(si);
if (expand_shmem(si, size))
return -1;
update_shmem_pmaps(si, map, vma);
return 0;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment