Commit afde3659 authored by Andrew Vagin's avatar Andrew Vagin Committed by Pavel Emelyanov

net: set the IP_CT_TCP_FLAG_BE_LIBERAL flag for conntracks

Currently the kernel doesn't report sequence numbers for contracks
and tries to restore them from first packets.
When we are restoring a tcp connection, we send a window probe and
set seq - 1 in it to get ack immediatly.

        /* Use a previous sequence.  This should cause the other
         * end to send an ack.  Don't queue or clone SKB, just
         * send it.
         */
        tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);

But conntrack doesn't like this, because then we get ack, which is greater than seq.
It looks like we try to ack data which we haven't received yet.

[  735.528073] td_maxwin == 0
               seq=1081132048 ack=2965916432+(0) sack=2965916432+(0) win=342 end=1081132048
               tcp_in_window: sender end=0 maxend=0 maxwin=0 scale=0 receiver end=0 maxend=0 maxwin=0 scale=0
[  735.533409] log_invalid:
               seq=2965916431 ack=1081132049+(0) sack=1081132049+(0) win=342 end=2965916431
               tcp_in_window: sender end=2965916431 maxend=2965916773 maxwin=342 scale=0 receiver end=1081132048 maxend=1081132390 maxwin=342 scale=0
[  735.537651] nf_ct_tcp: ACK is over the upper bound (ACKed data not seen yet)

The kernel sets IP_CT_TCP_FLAG_BE_LIBERAL for new conntracks,
if we are in the middle of a connection.
Signed-off-by: 's avatarAndrew Vagin <avagin@virtuozzo.com>
Signed-off-by: 's avatarPavel Emelyanov <xemul@virtuozzo.com>
parent 0dcfcc0e
......@@ -141,12 +141,12 @@ ifneq ($(PIEGEN),no)
endif
endif
cflags-y += -iquote include -iquote pie -iquote .
cflags-y += -iquote include -iquote pie -iquote . -I/usr/include/libnl3
cflags-y += -iquote $(ARCH_DIR) -iquote $(ARCH_DIR)/include
cflags-y += -fno-strict-aliasing
export cflags-y
LIBS := -lrt -lpthread -lprotobuf-c -ldl
LIBS := -lrt -lpthread -lprotobuf-c -ldl -lnl-3
DEFINES += -D_FILE_OFFSET_BITS=64
DEFINES += -D_GNU_SOURCE
......@@ -161,7 +161,7 @@ ifeq ($(DEBUG),1)
DEFINES += -DCR_DEBUG
CFLAGS += -O0 -ggdb3
else
CFLAGS += -O2
CFLAGS += -O2 -g
endif
ifeq ($(GMON),1)
......
......@@ -12,6 +12,7 @@
#include <sys/mount.h>
#include <net/if.h>
#include <linux/sockios.h>
#include <libnl3/netlink/msg.h>
#include "imgset.h"
#include "namespaces.h"
......@@ -359,6 +360,54 @@ static int dump_one_nf(struct nlmsghdr *hdr, void *arg)
return 0;
}
static int ct_restore_callback(struct nlmsghdr *nlh)
{
struct nfgenmsg *msg;
struct nlattr *tb[CTA_MAX+1], *tbp[CTA_PROTOINFO_MAX + 1], *tb_tcp[CTA_PROTOINFO_TCP_MAX+1];
int err;
msg = NLMSG_DATA(nlh);
if (msg->nfgen_family != AF_INET && msg->nfgen_family != AF_INET6)
return 0;
err = nlmsg_parse(nlh, sizeof(struct nfgenmsg), tb, CTA_MAX, NULL);
if (err < 0)
return -1;
if (!tb[CTA_PROTOINFO])
return 0;
err = nla_parse_nested(tbp, CTA_PROTOINFO_MAX, tb[CTA_PROTOINFO], NULL);
if (err < 0)
return -1;
if (!tbp[CTA_PROTOINFO_TCP])
return 0;
err = nla_parse_nested(tb_tcp, CTA_PROTOINFO_TCP_MAX, tbp[CTA_PROTOINFO_TCP], NULL);
if (err < 0)
return -1;
if (tb_tcp[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) {
struct nf_ct_tcp_flags *flags;
flags = nla_data(tb_tcp[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]);
flags->flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
flags->mask |= IP_CT_TCP_FLAG_BE_LIBERAL;
}
if (tb_tcp[CTA_PROTOINFO_TCP_FLAGS_REPLY]) {
struct nf_ct_tcp_flags *flags;
flags = nla_data(tb_tcp[CTA_PROTOINFO_TCP_FLAGS_REPLY]);
flags->flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
flags->mask |= IP_CT_TCP_FLAG_BE_LIBERAL;
}
return 0;
}
static int restore_nf_ct(int pid, int type)
{
struct nlmsghdr *nlh = NULL;
......@@ -404,6 +453,10 @@ static int restore_nf_ct(int pid, int type)
goto out;
}
if (type == CR_FD_NETNF_CT)
if (ct_restore_callback(nlh))
goto out;
nlh->nlmsg_flags = NLM_F_REQUEST|NLM_F_ACK|NLM_F_CREATE;
ret = do_rtnl_req(sk, nlh, nlh->nlmsg_len, NULL, NULL, NULL);
if (ret)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment