kernel: fix conntrack leak for flow_offload connections

This was caused by a race condition between offload teardown and
conntrack gc bumping the timeout of offloaded connections

Signed-off-by: Felix Fietkau <nbd@nbd.name>
This commit is contained in:
Felix Fietkau 2018-06-13 12:46:54 +02:00
parent e820734f79
commit 68ab89854f
3 changed files with 123 additions and 28 deletions

View file

@ -0,0 +1,110 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 13 Jun 2018 12:33:39 +0200
Subject: [PATCH] netfilter: nf_flow_table: fix offloaded connection timeout
corner case
The full teardown of offloaded flows is deferred to a gc work item,
however processing of packets by netfilter needs to happen immediately
after a teardown is requested, because the conntrack state needs to be
fixed up.
Since the IPS_OFFLOAD_BIT is still kept until the teardown is complete,
the netfilter conntrack gc can accidentally bump the timeout of a
connection where offload was just stopped, causing a conntrack entry
leak.
Fix this by moving the conntrack timeout bumping from conntrack core to
the nf_flow_offload and add a check to prevent bogus timeout bumps.
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -978,18 +978,6 @@ static bool gc_worker_can_early_drop(con
return false;
}
-#define DAY (86400 * HZ)
-
-/* Set an arbitrary timeout large enough not to ever expire, this save
- * us a check for the IPS_OFFLOAD_BIT from the packet path via
- * nf_ct_is_expired().
- */
-static void nf_ct_offload_timeout(struct nf_conn *ct)
-{
- if (nf_ct_expires(ct) < DAY / 2)
- ct->timeout = nfct_time_stamp + DAY;
-}
-
static void gc_worker(struct work_struct *work)
{
unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u);
@@ -1026,10 +1014,8 @@ static void gc_worker(struct work_struct
tmp = nf_ct_tuplehash_to_ctrack(h);
scanned++;
- if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) {
- nf_ct_offload_timeout(tmp);
+ if (test_bit(IPS_OFFLOAD_BIT, &tmp->status))
continue;
- }
if (nf_ct_is_expired(tmp)) {
nf_ct_gc_expired(tmp);
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -185,8 +185,27 @@ static const struct rhashtable_params nf
.automatic_shrinking = true,
};
+#define DAY (86400 * HZ)
+
+/* Set an arbitrary timeout large enough not to ever expire, this save
+ * us a check for the IPS_OFFLOAD_BIT from the packet path via
+ * nf_ct_is_expired().
+ */
+static void nf_ct_offload_timeout(struct flow_offload *flow)
+{
+ struct flow_offload_entry *entry;
+ struct nf_conn *ct;
+
+ entry = container_of(flow, struct flow_offload_entry, flow);
+ ct = entry->ct;
+
+ if (nf_ct_expires(ct) < DAY / 2)
+ ct->timeout = nfct_time_stamp + DAY;
+}
+
int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
{
+ nf_ct_offload_timeout(flow);
flow->timeout = (u32)jiffies;
rhashtable_insert_fast(&flow_table->rhashtable,
@@ -307,6 +326,8 @@ static int nf_flow_offload_gc_step(struc
rhashtable_walk_start(&hti);
while ((tuplehash = rhashtable_walk_next(&hti))) {
+ bool teardown;
+
if (IS_ERR(tuplehash)) {
err = PTR_ERR(tuplehash);
if (err != -EAGAIN)
@@ -319,9 +340,13 @@ static int nf_flow_offload_gc_step(struc
flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
- if (nf_flow_has_expired(flow) ||
- (flow->flags & (FLOW_OFFLOAD_DYING |
- FLOW_OFFLOAD_TEARDOWN)))
+ teardown = flow->flags & (FLOW_OFFLOAD_DYING |
+ FLOW_OFFLOAD_TEARDOWN);
+
+ if (!teardown)
+ nf_ct_offload_timeout(flow);
+
+ if (nf_flow_has_expired(flow) || teardown)
flow_offload_del(flow_table, flow);
}
out:

View file

@ -156,7 +156,7 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
--- a/net/netfilter/nf_flow_table_core.c --- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c
@@ -199,10 +199,16 @@ int flow_offload_add(struct nf_flowtable @@ -218,10 +218,16 @@ int flow_offload_add(struct nf_flowtable
} }
EXPORT_SYMBOL_GPL(flow_offload_add); EXPORT_SYMBOL_GPL(flow_offload_add);
@ -173,7 +173,7 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
rhashtable_remove_fast(&flow_table->rhashtable, rhashtable_remove_fast(&flow_table->rhashtable,
&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node, &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
@@ -214,6 +220,9 @@ static void flow_offload_del(struct nf_f @@ -233,6 +239,9 @@ static void flow_offload_del(struct nf_f
e = container_of(flow, struct flow_offload_entry, flow); e = container_of(flow, struct flow_offload_entry, flow);
clear_bit(IPS_OFFLOAD_BIT, &e->ct->status); clear_bit(IPS_OFFLOAD_BIT, &e->ct->status);
@ -183,32 +183,17 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
flow_offload_free(flow); flow_offload_free(flow);
} }
@@ -307,6 +316,7 @@ static int nf_flow_offload_gc_step(struc @@ -346,6 +355,9 @@ static int nf_flow_offload_gc_step(struc
rhashtable_walk_start(&hti); if (!teardown)
nf_ct_offload_timeout(flow);
while ((tuplehash = rhashtable_walk_next(&hti))) {
+ bool teardown;
if (IS_ERR(tuplehash)) {
err = PTR_ERR(tuplehash);
if (err != -EAGAIN)
@@ -319,9 +329,13 @@ static int nf_flow_offload_gc_step(struc
flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
- if (nf_flow_has_expired(flow) ||
- (flow->flags & (FLOW_OFFLOAD_DYING |
- FLOW_OFFLOAD_TEARDOWN)))
+ teardown = flow->flags & (FLOW_OFFLOAD_DYING |
+ FLOW_OFFLOAD_TEARDOWN);
+
+ if (nf_flow_in_hw(flow) && !teardown) + if (nf_flow_in_hw(flow) && !teardown)
+ continue; + continue;
+ +
+ if (nf_flow_has_expired(flow) || teardown) if (nf_flow_has_expired(flow) || teardown)
flow_offload_del(flow_table, flow); flow_offload_del(flow_table, flow);
} }
out: @@ -481,10 +493,43 @@ int nf_flow_dnat_port(const struct flow_
@@ -456,10 +470,43 @@ int nf_flow_dnat_port(const struct flow_
} }
EXPORT_SYMBOL_GPL(nf_flow_dnat_port); EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
@ -252,7 +237,7 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc); INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
err = rhashtable_init(&flowtable->rhashtable, err = rhashtable_init(&flowtable->rhashtable,
@@ -497,6 +544,8 @@ static void nf_flow_table_iterate_cleanu @@ -522,6 +567,8 @@ static void nf_flow_table_iterate_cleanu
{ {
nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev); nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev);
flush_delayed_work(&flowtable->gc_work); flush_delayed_work(&flowtable->gc_work);
@ -261,7 +246,7 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
} }
void nf_flow_table_cleanup(struct net *net, struct net_device *dev) void nf_flow_table_cleanup(struct net *net, struct net_device *dev)
@@ -510,6 +559,26 @@ void nf_flow_table_cleanup(struct net *n @@ -535,6 +582,26 @@ void nf_flow_table_cleanup(struct net *n
} }
EXPORT_SYMBOL_GPL(nf_flow_table_cleanup); EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
@ -288,7 +273,7 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
void nf_flow_table_free(struct nf_flowtable *flow_table) void nf_flow_table_free(struct nf_flowtable *flow_table)
{ {
mutex_lock(&flowtable_lock); mutex_lock(&flowtable_lock);
@@ -519,9 +588,58 @@ void nf_flow_table_free(struct nf_flowta @@ -544,9 +611,58 @@ void nf_flow_table_free(struct nf_flowta
nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL); nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
WARN_ON(!nf_flow_offload_gc_step(flow_table)); WARN_ON(!nf_flow_offload_gc_step(flow_table));
rhashtable_destroy(&flow_table->rhashtable); rhashtable_destroy(&flow_table->rhashtable);

View file

@ -26,9 +26,9 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX]; struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX];
--- a/net/netfilter/nf_flow_table_core.c --- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c
@@ -332,7 +332,7 @@ static int nf_flow_offload_gc_step(struc @@ -355,7 +355,7 @@ static int nf_flow_offload_gc_step(struc
teardown = flow->flags & (FLOW_OFFLOAD_DYING | if (!teardown)
FLOW_OFFLOAD_TEARDOWN); nf_ct_offload_timeout(flow);
- if (nf_flow_in_hw(flow) && !teardown) - if (nf_flow_in_hw(flow) && !teardown)
+ if ((flow->flags & FLOW_OFFLOAD_KEEP) && !teardown) + if ((flow->flags & FLOW_OFFLOAD_KEEP) && !teardown)