Commit 53771adc authored by Pavel Emelyanov's avatar Pavel Emelyanov

bfd: File-descriptors based buffered read

This sounds strange, but we kinda need one. Here's the
justification for that.

We heavily open /proc/pid/foo files. To speed things up we
do pid_dir = open("/proc/pid") then openat(pid_dir, foo).
This really saves time on big trees, up to 10%.

Sometimes we need line-by-line scan of these files, and for
that we currently use the fdopen() call. It takes a file
descriptor (obtained with openat from above) and wraps one
into a FILE*.

The problem with the latter is that fdopen _always_ mmap()s
a buffer for reads and this buffer always (!) gets unmapped
back on fclose(). This pair of mmap() + munmap() eats time
on big trees, up to 10% in my experiments with p.haul tests.

The situation is made even worse by the fact that each fgets
on the file results in a new page allocated in the kernel
(since the mapping is new). And also this fgets copies data,
which is not big deal, but for e.g. smaps file this results
in ~8K bytes being just copied around.

Having said that, here's a small but fast way of reading a
descriptor line-by-line using big buffer for reducing the
amount of read()s.

After all per-task fopen_proc()-s get reworked on this engine
(next 4 patches) the results on p.haul test would be

        Syscall     Calls      Time (% of time)
Now:
           mmap:      463  0.012033 (3.2%)
         munmap:      447  0.014473 (3.9%)
Patched:
         munmap:       57  0.002106 (0.6%)
           mmap:       74  0.002286 (0.7%)

The amount of read()s and open()s doesn't change since FILE*
also uses page-sized buffer for reading.

Also this eliminates some amount of lseek()s and fstat()s
the fdopen() does every time to catch up with file position
and to determine what sort of buffering it should use (for
terminals it's \n-driven, for files it's not).
Signed-off-by: 's avatarPavel Emelyanov <xemul@parallels.com>
parent b30f0f01
...@@ -15,6 +15,7 @@ obj-y += cr-show.o ...@@ -15,6 +15,7 @@ obj-y += cr-show.o
obj-y += cr-check.o obj-y += cr-check.o
obj-y += cr-dedup.o obj-y += cr-dedup.o
obj-y += util.o obj-y += util.o
obj-y += bfd.o
obj-y += action-scripts.o obj-y += action-scripts.o
obj-y += sysctl.o obj-y += sysctl.o
obj-y += ptrace.o obj-y += ptrace.o
......
#include <unistd.h>
#include <stdbool.h>
#include <string.h>
#include <stdio.h>
#include <sys/mman.h>
#include <fcntl.h>
#include "log.h"
#include "bfd.h"
#include "asm-generic/page.h"
/*
* Kernel doesn't produce more than one page of
* date per one read call on proc files.
*/
#define BUFSIZE (PAGE_SIZE)
/*
* XXX currently CRIU doesn't open several files
* at once, so we can have just one buffer.
*/
static char *buf;
static bool buf_busy;
static int buf_get(struct xbuf *xb)
{
if (buf_busy) {
pr_err("bfd: Buffer busy\n");
return -1;
}
if (!buf) {
buf = mmap(NULL, BUFSIZE, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANON, 0, 0);
if (buf == MAP_FAILED) {
pr_perror("bfd: No buf");
return -1;
}
}
buf_busy = true;
xb->mem = buf;
xb->pos = buf;
xb->bleft = 0;
return 0;
}
static void buf_put(struct xbuf *xb)
{
buf_busy = false;
/*
* Don't unmap buffer back, it will get reused
* by next bfdopen call
*/
}
int bfdopen(struct bfd *f)
{
if (buf_get(&f->b)) {
close(f->fd);
return -1;
}
return 0;
}
void bclose(struct bfd *f)
{
buf_put(&f->b);
close(f->fd);
}
static int brefill(struct bfd *f)
{
int ret;
struct xbuf *b = &f->b;
memmove(b->mem, b->pos, b->bleft);
b->pos = b->mem;
ret = read(f->fd, b->mem + b->bleft, BUFSIZE - b->bleft);
if (ret < 0) {
pr_perror("bfd: Error reading file");
return -1;
}
if (ret == 0)
return 0;
b->bleft += ret;
return 0;
}
static char *strnchr(char *str, unsigned int len, char c)
{
while (len > 0 && *str != c) {
str++;
len--;
}
return len == 0 ? NULL : str;
}
char *breadline(struct bfd *f)
{
struct xbuf *b = &f->b;
bool refilled = false;
char *n;
unsigned int ss = 0;
again:
n = strnchr(b->pos + ss, b->bleft - ss, '\n');
if (n) {
char *ret;
ret = b->pos;
b->pos = n + 1; /* skip the \n found */
*n = '\0';
b->bleft -= (b->pos - ret);
return ret;
}
if (refilled) {
if (!b->bleft)
return NULL;
/*
* Last bytes may lack the \n at the
* end, need to report this as full
* line anyway
*/
b->pos[b->bleft] = '\0';
/*
* The b->pos still points to old data,
* but we say that no bytes left there
* so next call to breadline will not
* "find" these bytes again.
*/
b->bleft = 0;
return b->pos;
}
/* no full line in the buffer -- refill one */
if (brefill(f))
return BREADERR;
/*
* small optimization -- we've scanned b->bleft
* symols already, no need to re-scan them after
* the buffer refill.
*/
ss = b->bleft;
refilled = true;
goto again;
}
#ifndef __CR_BFD_H__
#define __CR_BFD_H__
struct xbuf {
char *mem; /* buffer */
char *pos; /* position we see bytes at */
unsigned int bleft; /* bytes left after b->pos */
};
struct bfd {
int fd;
struct xbuf b;
};
#define BREADERR ((char *)-1)
int bfdopen(struct bfd *f);
void bclose(struct bfd *f);
char *breadline(struct bfd *f);
#endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment