Subject: NET: skip GRO for foreign MAC addresses For network drivers using napi_gro_receive, packets are run through GRO, even when the destination MAC address does not match, and they're supposed to be delivered to another host behind a different bridge port. This can be very expensive, because for drivers without TSO or scatter- gather, this can only be undone by copying the skb and checksumming it again. To be able to track foreign MAC addresses in an inexpensive way, create a mask of changed bits in MAC addresses of upper devices. This allows handling VLANs and bridge devices with different addresses (as long as they are not too different). Signed-off-by: Felix Fietkau --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4485,6 +4485,9 @@ static enum gro_result dev_gro_receive(s enum gro_result ret; int grow; + if (skb->gro_skip) + goto normal; + if (!(skb->dev->features & NETIF_F_GRO)) goto normal; @@ -5762,6 +5765,48 @@ static void __netdev_adjacent_dev_unlink &upper_dev->adj_list.lower); } +static void __netdev_addr_mask(unsigned char *mask, const unsigned char *addr, + struct net_device *dev) +{ + int i; + + for (i = 0; i < dev->addr_len; i++) + mask[i] |= addr[i] ^ dev->dev_addr[i]; +} + +static void __netdev_upper_mask(unsigned char *mask, struct net_device *dev, + struct net_device *lower) +{ + struct net_device *cur; + struct list_head *iter; + + netdev_for_each_upper_dev_rcu(dev, cur, iter) { + __netdev_addr_mask(mask, cur->dev_addr, lower); + __netdev_upper_mask(mask, cur, lower); + } +} + +static void __netdev_update_addr_mask(struct net_device *dev) +{ + unsigned char mask[MAX_ADDR_LEN]; + struct net_device *cur; + struct list_head *iter; + + memset(mask, 0, sizeof(mask)); + __netdev_upper_mask(mask, dev, dev); + memcpy(dev->local_addr_mask, mask, dev->addr_len); + + netdev_for_each_lower_dev(dev, cur, iter) + __netdev_update_addr_mask(cur); +} + +static void netdev_update_addr_mask(struct net_device *dev) +{ + rcu_read_lock(); + __netdev_update_addr_mask(dev); + rcu_read_unlock(); +} + static int __netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, bool master, void *upper_priv, void *upper_info) @@ -5960,6 +6005,8 @@ void netdev_upper_dev_unlink(struct net_ list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) __netdev_adjacent_dev_unlink(dev, i->dev, i->ref_nr); + netdev_update_addr_mask(dev); + netdev_update_addr_mask(dev); call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, &changeupper_info.info); } @@ -6560,6 +6607,7 @@ int dev_set_mac_address(struct net_devic if (err) return err; dev->addr_assign_type = NET_ADDR_SET; + netdev_update_addr_mask(dev); call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); add_device_randomness(dev->dev_addr, dev->addr_len); return 0; --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1748,6 +1748,8 @@ struct net_device { struct netdev_hw_addr_list mc; struct netdev_hw_addr_list dev_addrs; + unsigned char local_addr_mask[MAX_ADDR_LEN]; + #ifdef CONFIG_SYSFS struct kset *queues_kset; #endif --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -742,7 +742,8 @@ struct sk_buff { #ifdef CONFIG_NET_SWITCHDEV __u8 offload_fwd_mark:1; #endif - /* 2, 4 or 5 bit hole */ + __u8 gro_skip:1; + /* 1, 3 or 4 bit hole */ #ifdef CONFIG_NET_SCHED __u16 tc_index; /* traffic control index */ --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -143,6 +143,18 @@ u32 eth_get_headlen(void *data, unsigned } EXPORT_SYMBOL(eth_get_headlen); +static inline bool +eth_check_local_mask(const void *addr1, const void *addr2, const void *mask) +{ + const u16 *a1 = addr1; + const u16 *a2 = addr2; + const u16 *m = mask; + + return (((a1[0] ^ a2[0]) & ~m[0]) | + ((a1[1] ^ a2[1]) & ~m[1]) | + ((a1[2] ^ a2[2]) & ~m[2])); +} + /** * eth_type_trans - determine the packet's protocol ID. * @skb: received socket data @@ -171,8 +183,12 @@ __be16 eth_type_trans(struct sk_buff *sk skb->pkt_type = PACKET_MULTICAST; } else if (unlikely(!ether_addr_equal_64bits(eth->h_dest, - dev->dev_addr))) + dev->dev_addr))) { skb->pkt_type = PACKET_OTHERHOST; + if (eth_check_local_mask(eth->h_dest, dev->dev_addr, + dev->local_addr_mask)) + skb->gro_skip = 1; + } /* * Some variants of DSA tagging don't have an ethertype field