netfilter: conntrack: add clash resolution for reverse collisions

Given existing entry:
ORIGIN: a:b -> c:d
REPLY:  c:d -> a:b

And colliding entry:
ORIGIN: c:d -> a:b
REPLY:  a:b -> c:d

The colliding ct (and the associated skb) get dropped on insert.
Permit this by checking if the colliding entry matches the reply
direction.

Happens when both ends send packets at same time, both requests are picked
up as NEW, rather than NEW for the 'first' and 'ESTABLISHED' for the
second packet.

This is an esoteric condition, as ruleset must permit NEW connections
in either direction and both peers must already have a bidirectional
traffic flow at the time conntrack gets enabled.

Allow the 'reverse' skb to pass and assign the existing (clashing)
entry.

While at it, also drop the extra 'dying' check, this is already
tested earlier by the calling function.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
This commit is contained in:
Florian Westphal 2024-09-10 11:38:15 +02:00 committed by Pablo Neira Ayuso
parent d8f84a9bc7
commit a4e6a1031e

View file

@ -988,6 +988,56 @@ static void __nf_conntrack_insert_prepare(struct nf_conn *ct)
tstamp->start = ktime_get_real_ns();
}
/**
* nf_ct_match_reverse - check if ct1 and ct2 refer to identical flow
* @ct1: conntrack in hash table to check against
* @ct2: merge candidate
*
* returns true if ct1 and ct2 happen to refer to the same flow, but
* in opposing directions, i.e.
* ct1: a:b -> c:d
* ct2: c:d -> a:b
* for both directions. If so, @ct2 should not have been created
* as the skb should have been picked up as ESTABLISHED flow.
* But ct1 was not yet committed to hash table before skb that created
* ct2 had arrived.
*
* Note we don't compare netns because ct entries in different net
* namespace cannot clash to begin with.
*
* @return: true if ct1 and ct2 are identical when swapping origin/reply.
*/
static bool
nf_ct_match_reverse(const struct nf_conn *ct1, const struct nf_conn *ct2)
{
u16 id1, id2;
if (!nf_ct_tuple_equal(&ct1->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
&ct2->tuplehash[IP_CT_DIR_REPLY].tuple))
return false;
if (!nf_ct_tuple_equal(&ct1->tuplehash[IP_CT_DIR_REPLY].tuple,
&ct2->tuplehash[IP_CT_DIR_ORIGINAL].tuple))
return false;
id1 = nf_ct_zone_id(nf_ct_zone(ct1), IP_CT_DIR_ORIGINAL);
id2 = nf_ct_zone_id(nf_ct_zone(ct2), IP_CT_DIR_REPLY);
if (id1 != id2)
return false;
id1 = nf_ct_zone_id(nf_ct_zone(ct1), IP_CT_DIR_REPLY);
id2 = nf_ct_zone_id(nf_ct_zone(ct2), IP_CT_DIR_ORIGINAL);
return id1 == id2;
}
static int nf_ct_can_merge(const struct nf_conn *ct,
const struct nf_conn *loser_ct)
{
return nf_ct_match(ct, loser_ct) ||
nf_ct_match_reverse(ct, loser_ct);
}
/* caller must hold locks to prevent concurrent changes */
static int __nf_ct_resolve_clash(struct sk_buff *skb,
struct nf_conntrack_tuple_hash *h)
@ -999,11 +1049,7 @@ static int __nf_ct_resolve_clash(struct sk_buff *skb,
loser_ct = nf_ct_get(skb, &ctinfo);
if (nf_ct_is_dying(ct))
return NF_DROP;
if (((ct->status & IPS_NAT_DONE_MASK) == 0) ||
nf_ct_match(ct, loser_ct)) {
if (nf_ct_can_merge(ct, loser_ct)) {
struct net *net = nf_ct_net(ct);
nf_conntrack_get(&ct->ct_general);