kernel: add minimal TCP state tracking to flow offload support

Fixes issues with connections hanging after >30 seconds idle time

Signed-off-by: Felix Fietkau <nbd@nbd.name>
This commit is contained in:
Felix Fietkau 2018-02-25 15:48:23 +01:00
parent 8f24653184
commit a86e6b5a9f
5 changed files with 293 additions and 0 deletions

View file

@ -0,0 +1,38 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Sun, 25 Feb 2018 15:37:27 +0100
Subject: [PATCH] netfilter: nf_flow_table: make flow_offload_dead inline
It is too trivial to keep as a separate exported function
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -103,7 +103,10 @@ void nf_flow_table_cleanup(struct net *n
int nf_flow_table_init(struct nf_flowtable *flow_table);
void nf_flow_table_free(struct nf_flowtable *flow_table);
-void flow_offload_dead(struct flow_offload *flow);
+static inline void flow_offload_dead(struct flow_offload *flow)
+{
+ flow->flags |= FLOW_OFFLOAD_DYING;
+}
int nf_flow_snat_port(const struct flow_offload *flow,
struct sk_buff *skb, unsigned int thoff,
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -113,12 +113,6 @@ void flow_offload_free(struct flow_offlo
}
EXPORT_SYMBOL_GPL(flow_offload_free);
-void flow_offload_dead(struct flow_offload *flow)
-{
- flow->flags |= FLOW_OFFLOAD_DYING;
-}
-EXPORT_SYMBOL_GPL(flow_offload_dead);
-
static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
{
const struct flow_offload_tuple *tuple = data;

View file

@ -0,0 +1,74 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Sun, 25 Feb 2018 15:38:31 +0100
Subject: [PATCH] netfilter: nf_flow_table: add a new flow state for
tearing down offloading
Will be used to tear down the offload entry while keeping the conntrack
entry alive.
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -68,6 +68,7 @@ struct flow_offload_tuple_rhash {
#define FLOW_OFFLOAD_SNAT 0x1
#define FLOW_OFFLOAD_DNAT 0x2
#define FLOW_OFFLOAD_DYING 0x4
+#define FLOW_OFFLOAD_TEARDOWN 0x8
struct flow_offload {
struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX];
@@ -108,6 +109,11 @@ static inline void flow_offload_dead(str
flow->flags |= FLOW_OFFLOAD_DYING;
}
+static inline void flow_offload_teardown(struct flow_offload *flow)
+{
+ flow->flags |= FLOW_OFFLOAD_TEARDOWN;
+}
+
int nf_flow_snat_port(const struct flow_offload *flow,
struct sk_buff *skb, unsigned int thoff,
u8 protocol, enum flow_offload_tuple_dir dir);
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -226,11 +226,6 @@ static inline bool nf_flow_has_expired(c
return (__s32)(flow->timeout - (u32)jiffies) <= 0;
}
-static inline bool nf_flow_is_dying(const struct flow_offload *flow)
-{
- return flow->flags & FLOW_OFFLOAD_DYING;
-}
-
static int nf_flow_offload_gc_step(struct nf_flowtable *flow_table)
{
struct flow_offload_tuple_rhash *tuplehash;
@@ -258,7 +253,8 @@ static int nf_flow_offload_gc_step(struc
flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
if (nf_flow_has_expired(flow) ||
- nf_flow_is_dying(flow))
+ (flow->flags & (FLOW_OFFLOAD_DYING |
+ FLOW_OFFLOAD_TEARDOWN)))
flow_offload_del(flow_table, flow);
}
out:
@@ -419,10 +415,14 @@ static void nf_flow_table_do_cleanup(str
{
struct net_device *dev = data;
- if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex)
+ if (!dev) {
+ flow_offload_teardown(flow);
return;
+ }
- flow_offload_dead(flow);
+ if (flow->tuplehash[0].tuple.iifidx == dev->ifindex ||
+ flow->tuplehash[1].tuple.iifidx == dev->ifindex)
+ flow_offload_dead(flow);
}
static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,

View file

@ -0,0 +1,36 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Sun, 25 Feb 2018 15:39:56 +0100
Subject: [PATCH] netfilter: nf_flow_table: in flow_offload_lookup, skip
entries being deleted
Preparation for sending flows back to the slow path
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -178,8 +178,21 @@ struct flow_offload_tuple_rhash *
flow_offload_lookup(struct nf_flowtable *flow_table,
struct flow_offload_tuple *tuple)
{
- return rhashtable_lookup_fast(&flow_table->rhashtable, tuple,
- nf_flow_offload_rhash_params);
+ struct flow_offload_tuple_rhash *tuplehash;
+ struct flow_offload *flow;
+ int dir;
+
+ tuplehash = rhashtable_lookup_fast(&flow_table->rhashtable, tuple,
+ nf_flow_offload_rhash_params);
+ if (!tuplehash)
+ return NULL;
+
+ dir = tuplehash->tuple.dir;
+ flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+ if (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN))
+ return NULL;
+
+ return tuplehash;
}
EXPORT_SYMBOL_GPL(flow_offload_lookup);

View file

@ -0,0 +1,64 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Sun, 25 Feb 2018 15:41:11 +0100
Subject: [PATCH] netfilter: nf_flow_table: add support for sending flows
back to the slow path
Reset the timeout. For TCP, also set the state to indicate to use the
next incoming packets to reset window tracking.
This allows the slow path to take over again once the offload state has
been torn down
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -100,6 +100,36 @@ err_ct_refcnt:
}
EXPORT_SYMBOL_GPL(flow_offload_alloc);
+static void flow_offload_fixup_ct_state(struct nf_conn *ct)
+{
+ const struct nf_conntrack_l4proto *l4proto;
+ struct net *net = nf_ct_net(ct);
+ unsigned int *timeouts;
+ unsigned int timeout;
+ int l4num;
+
+ l4num = nf_ct_protonum(ct);
+ l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), l4num);
+ if (!l4proto)
+ return;
+
+ timeouts = l4proto->get_timeouts(net);
+ if (!timeouts)
+ return;
+
+ if (l4num == IPPROTO_TCP) {
+ timeout = timeouts[TCP_CONNTRACK_ESTABLISHED];
+ ct->proto.tcp.state = TCP_CONNTRACK_IGNORE;
+ } else if (l4num == IPPROTO_UDP) {
+ timeout = timeouts[UDP_CT_REPLIED];
+ } else {
+ return;
+ }
+
+ ct->timeout = nfct_time_stamp + timeout;
+ clear_bit(IPS_OFFLOAD_BIT, &ct->status);
+}
+
void flow_offload_free(struct flow_offload *flow)
{
struct flow_offload_entry *e;
@@ -107,7 +137,10 @@ void flow_offload_free(struct flow_offlo
dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
e = container_of(flow, struct flow_offload_entry, flow);
- nf_ct_delete(e->ct, 0, 0);
+ if (flow->flags & FLOW_OFFLOAD_DYING)
+ nf_ct_delete(e->ct, 0, 0);
+ else
+ flow_offload_fixup_ct_state(e->ct);
nf_ct_put(e->ct);
kfree_rcu(e, rcu_head);
}

View file

@ -0,0 +1,81 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Sun, 25 Feb 2018 15:42:58 +0100
Subject: [PATCH] netfilter: nf_flow_table: tear down TCP flows if RST or
FIN was seen
Allow the slow path to handle the shutdown of the connection with proper
timeouts
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -15,6 +15,23 @@
#include <linux/tcp.h>
#include <linux/udp.h>
+static int nf_flow_tcp_state_check(struct flow_offload *flow,
+ struct sk_buff *skb, unsigned int thoff)
+{
+ struct tcphdr *tcph;
+
+ if (!pskb_may_pull(skb, thoff + sizeof(*tcph)))
+ return -1;
+
+ tcph = (void *)(skb_network_header(skb) + thoff);
+ if (unlikely(tcph->fin || tcph->rst)) {
+ flow_offload_teardown(flow);
+ return -1;
+ }
+
+ return 0;
+}
+
static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
__be32 addr, __be32 new_addr)
{
@@ -118,10 +135,9 @@ static int nf_flow_dnat_ip(const struct
}
static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
- enum flow_offload_tuple_dir dir)
+ unsigned int thoff, enum flow_offload_tuple_dir dir)
{
struct iphdr *iph = ip_hdr(skb);
- unsigned int thoff = iph->ihl * 4;
if (flow->flags & FLOW_OFFLOAD_SNAT &&
(nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
@@ -201,6 +217,7 @@ nf_flow_offload_ip_hook(void *priv, stru
struct flow_offload *flow;
struct net_device *outdev;
const struct rtable *rt;
+ unsigned int thoff;
struct iphdr *iph;
__be32 nexthop;
@@ -229,8 +246,12 @@ nf_flow_offload_ip_hook(void *priv, stru
if (skb_try_make_writable(skb, sizeof(*iph)))
return NF_DROP;
+ thoff = ip_hdr(skb)->ihl * 4;
+ if (nf_flow_tcp_state_check(flow, skb, thoff))
+ return NF_ACCEPT;
+
if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
- nf_flow_nat_ip(flow, skb, dir) < 0)
+ nf_flow_nat_ip(flow, skb, thoff, dir) < 0)
return NF_DROP;
flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
@@ -438,6 +459,9 @@ nf_flow_offload_ipv6_hook(void *priv, st
if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
return NF_ACCEPT;
+ if (nf_flow_tcp_state_check(flow, skb, sizeof(*ip6h)))
+ return NF_ACCEPT;
+
if (skb_try_make_writable(skb, sizeof(*ip6h)))
return NF_DROP;