kernel: merge upstream bgmac driver improvements

Signed-off-by: Felix Fietkau <nbd@openwrt.org>

SVN-Revision: 44978
This commit is contained in:
Felix Fietkau 2015-03-25 14:30:46 +00:00
parent 68ca1f285c
commit fafa3f57e3
3 changed files with 480 additions and 0 deletions

View file

@ -0,0 +1,24 @@
From: Felix Fietkau <nbd@openwrt.org>
Date: Mon, 23 Mar 2015 02:40:06 +0100
Subject: [PATCH] bgmac: fix descriptor frame start/end definitions
The start-of-frame and end-of-frame bits were accidentally swapped.
In the current code it does not make any difference, since they are
always used together.
Signed-off-by: Felix Fietkau <nbd@openwrt.org>
---
--- a/drivers/net/ethernet/broadcom/bgmac.h
+++ b/drivers/net/ethernet/broadcom/bgmac.h
@@ -345,8 +345,8 @@
#define BGMAC_DESC_CTL0_EOT 0x10000000 /* End of ring */
#define BGMAC_DESC_CTL0_IOC 0x20000000 /* IRQ on complete */
-#define BGMAC_DESC_CTL0_SOF 0x40000000 /* Start of frame */
-#define BGMAC_DESC_CTL0_EOF 0x80000000 /* End of frame */
+#define BGMAC_DESC_CTL0_EOF 0x40000000 /* End of frame */
+#define BGMAC_DESC_CTL0_SOF 0x80000000 /* Start of frame */
#define BGMAC_DESC_CTL1_LEN 0x00001FFF
#define BGMAC_PHY_NOREGS 0x1E

View file

@ -0,0 +1,189 @@
From: Felix Fietkau <nbd@openwrt.org>
Date: Mon, 23 Mar 2015 02:41:25 +0100
Subject: [PATCH] bgmac: implement GRO and use build_skb
This improves performance for routing and local rx
Signed-off-by: Felix Fietkau <nbd@openwrt.org>
---
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -276,31 +276,31 @@ static int bgmac_dma_rx_skb_for_slot(str
struct bgmac_slot_info *slot)
{
struct device *dma_dev = bgmac->core->dma_dev;
- struct sk_buff *skb;
dma_addr_t dma_addr;
struct bgmac_rx_header *rx;
+ void *buf;
/* Alloc skb */
- skb = netdev_alloc_skb(bgmac->net_dev, BGMAC_RX_BUF_SIZE);
- if (!skb)
+ buf = netdev_alloc_frag(BGMAC_RX_ALLOC_SIZE);
+ if (!buf)
return -ENOMEM;
/* Poison - if everything goes fine, hardware will overwrite it */
- rx = (struct bgmac_rx_header *)skb->data;
+ rx = buf;
rx->len = cpu_to_le16(0xdead);
rx->flags = cpu_to_le16(0xbeef);
/* Map skb for the DMA */
- dma_addr = dma_map_single(dma_dev, skb->data,
- BGMAC_RX_BUF_SIZE, DMA_FROM_DEVICE);
+ dma_addr = dma_map_single(dma_dev, buf, BGMAC_RX_BUF_SIZE,
+ DMA_FROM_DEVICE);
if (dma_mapping_error(dma_dev, dma_addr)) {
bgmac_err(bgmac, "DMA mapping error\n");
- dev_kfree_skb(skb);
+ put_page(virt_to_head_page(buf));
return -ENOMEM;
}
/* Update the slot */
- slot->skb = skb;
+ slot->buf = buf;
slot->dma_addr = dma_addr;
return 0;
@@ -343,8 +343,9 @@ static int bgmac_dma_rx_read(struct bgma
while (ring->start != ring->end) {
struct device *dma_dev = bgmac->core->dma_dev;
struct bgmac_slot_info *slot = &ring->slots[ring->start];
- struct sk_buff *skb = slot->skb;
- struct bgmac_rx_header *rx;
+ struct bgmac_rx_header *rx = slot->buf;
+ struct sk_buff *skb;
+ void *buf = slot->buf;
u16 len, flags;
/* Unmap buffer to make it accessible to the CPU */
@@ -352,7 +353,6 @@ static int bgmac_dma_rx_read(struct bgma
BGMAC_RX_BUF_SIZE, DMA_FROM_DEVICE);
/* Get info from the header */
- rx = (struct bgmac_rx_header *)skb->data;
len = le16_to_cpu(rx->len);
flags = le16_to_cpu(rx->flags);
@@ -393,12 +393,13 @@ static int bgmac_dma_rx_read(struct bgma
dma_unmap_single(dma_dev, old_dma_addr,
BGMAC_RX_BUF_SIZE, DMA_FROM_DEVICE);
+ skb = build_skb(buf, BGMAC_RX_ALLOC_SIZE);
skb_put(skb, BGMAC_RX_FRAME_OFFSET + len);
skb_pull(skb, BGMAC_RX_FRAME_OFFSET);
skb_checksum_none_assert(skb);
skb->protocol = eth_type_trans(skb, bgmac->net_dev);
- netif_receive_skb(skb);
+ napi_gro_receive(&bgmac->napi, skb);
handled++;
} while (0);
@@ -434,12 +435,11 @@ static bool bgmac_dma_unaligned(struct b
return false;
}
-static void bgmac_dma_ring_free(struct bgmac *bgmac,
- struct bgmac_dma_ring *ring)
+static void bgmac_dma_tx_ring_free(struct bgmac *bgmac,
+ struct bgmac_dma_ring *ring)
{
struct device *dma_dev = bgmac->core->dma_dev;
struct bgmac_slot_info *slot;
- int size;
int i;
for (i = 0; i < ring->num_slots; i++) {
@@ -451,23 +451,55 @@ static void bgmac_dma_ring_free(struct b
dev_kfree_skb(slot->skb);
}
}
+}
- if (ring->cpu_base) {
- /* Free ring of descriptors */
- size = ring->num_slots * sizeof(struct bgmac_dma_desc);
- dma_free_coherent(dma_dev, size, ring->cpu_base,
- ring->dma_base);
+static void bgmac_dma_rx_ring_free(struct bgmac *bgmac,
+ struct bgmac_dma_ring *ring)
+{
+ struct device *dma_dev = bgmac->core->dma_dev;
+ struct bgmac_slot_info *slot;
+ int i;
+
+ for (i = 0; i < ring->num_slots; i++) {
+ slot = &ring->slots[i];
+ if (!slot->buf)
+ continue;
+
+ if (slot->dma_addr)
+ dma_unmap_single(dma_dev, slot->dma_addr,
+ BGMAC_RX_BUF_SIZE,
+ DMA_FROM_DEVICE);
+ put_page(virt_to_head_page(slot->buf));
}
}
+static void bgmac_dma_ring_desc_free(struct bgmac *bgmac,
+ struct bgmac_dma_ring *ring)
+{
+ struct device *dma_dev = bgmac->core->dma_dev;
+ int size;
+
+ if (!ring->cpu_base)
+ return;
+
+ /* Free ring of descriptors */
+ size = ring->num_slots * sizeof(struct bgmac_dma_desc);
+ dma_free_coherent(dma_dev, size, ring->cpu_base,
+ ring->dma_base);
+}
+
static void bgmac_dma_free(struct bgmac *bgmac)
{
int i;
- for (i = 0; i < BGMAC_MAX_TX_RINGS; i++)
- bgmac_dma_ring_free(bgmac, &bgmac->tx_ring[i]);
- for (i = 0; i < BGMAC_MAX_RX_RINGS; i++)
- bgmac_dma_ring_free(bgmac, &bgmac->rx_ring[i]);
+ for (i = 0; i < BGMAC_MAX_TX_RINGS; i++) {
+ bgmac_dma_tx_ring_free(bgmac, &bgmac->tx_ring[i]);
+ bgmac_dma_ring_desc_free(bgmac, &bgmac->tx_ring[i]);
+ }
+ for (i = 0; i < BGMAC_MAX_RX_RINGS; i++) {
+ bgmac_dma_rx_ring_free(bgmac, &bgmac->rx_ring[i]);
+ bgmac_dma_ring_desc_free(bgmac, &bgmac->rx_ring[i]);
+ }
}
static int bgmac_dma_alloc(struct bgmac *bgmac)
--- a/drivers/net/ethernet/broadcom/bgmac.h
+++ b/drivers/net/ethernet/broadcom/bgmac.h
@@ -362,6 +362,8 @@
#define BGMAC_RX_FRAME_OFFSET 30 /* There are 2 unused bytes between header and real data */
#define BGMAC_RX_MAX_FRAME_SIZE 1536 /* Copied from b44/tg3 */
#define BGMAC_RX_BUF_SIZE (BGMAC_RX_FRAME_OFFSET + BGMAC_RX_MAX_FRAME_SIZE)
+#define BGMAC_RX_ALLOC_SIZE (SKB_DATA_ALIGN(BGMAC_RX_BUF_SIZE) + \
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
#define BGMAC_BFL_ENETROBO 0x0010 /* has ephy roboswitch spi */
#define BGMAC_BFL_ENETADM 0x0080 /* has ADMtek switch */
@@ -383,7 +385,10 @@
#define ETHER_MAX_LEN 1518
struct bgmac_slot_info {
- struct sk_buff *skb;
+ union {
+ struct sk_buff *skb;
+ void *buf;
+ };
dma_addr_t dma_addr;
};

View file

@ -0,0 +1,267 @@
From: Felix Fietkau <nbd@openwrt.org>
Date: Mon, 23 Mar 2015 02:42:26 +0100
Subject: [PATCH] bgmac: implement scatter/gather support
Always use software checksumming, since the hardware does not have any
checksum offload support.
This significantly improves local TCP tx performance.
Signed-off-by: Felix Fietkau <nbd@openwrt.org>
---
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -115,53 +115,91 @@ static void bgmac_dma_tx_enable(struct b
bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_CTL, ctl);
}
+static void
+bgmac_dma_tx_add_buf(struct bgmac *bgmac, struct bgmac_dma_ring *ring,
+ int i, int len, u32 ctl0)
+{
+ struct bgmac_slot_info *slot;
+ struct bgmac_dma_desc *dma_desc;
+ u32 ctl1;
+
+ if (i == ring->num_slots - 1)
+ ctl0 |= BGMAC_DESC_CTL0_EOT;
+
+ ctl1 = len & BGMAC_DESC_CTL1_LEN;
+
+ slot = &ring->slots[i];
+ dma_desc = &ring->cpu_base[i];
+ dma_desc->addr_low = cpu_to_le32(lower_32_bits(slot->dma_addr));
+ dma_desc->addr_high = cpu_to_le32(upper_32_bits(slot->dma_addr));
+ dma_desc->ctl0 = cpu_to_le32(ctl0);
+ dma_desc->ctl1 = cpu_to_le32(ctl1);
+}
+
static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac,
struct bgmac_dma_ring *ring,
struct sk_buff *skb)
{
struct device *dma_dev = bgmac->core->dma_dev;
struct net_device *net_dev = bgmac->net_dev;
- struct bgmac_dma_desc *dma_desc;
- struct bgmac_slot_info *slot;
- u32 ctl0, ctl1;
+ struct bgmac_slot_info *slot = &ring->slots[ring->end];
int free_slots;
+ int nr_frags;
+ u32 flags;
+ int index = ring->end;
+ int i;
if (skb->len > BGMAC_DESC_CTL1_LEN) {
bgmac_err(bgmac, "Too long skb (%d)\n", skb->len);
- goto err_stop_drop;
+ goto err_drop;
}
+ if (skb->ip_summed == CHECKSUM_PARTIAL)
+ skb_checksum_help(skb);
+
+ nr_frags = skb_shinfo(skb)->nr_frags;
+
if (ring->start <= ring->end)
free_slots = ring->start - ring->end + BGMAC_TX_RING_SLOTS;
else
free_slots = ring->start - ring->end;
- if (free_slots == 1) {
+
+ if (free_slots <= nr_frags + 1) {
bgmac_err(bgmac, "TX ring is full, queue should be stopped!\n");
netif_stop_queue(net_dev);
return NETDEV_TX_BUSY;
}
- slot = &ring->slots[ring->end];
- slot->skb = skb;
- slot->dma_addr = dma_map_single(dma_dev, skb->data, skb->len,
+ slot->dma_addr = dma_map_single(dma_dev, skb->data, skb_headlen(skb),
DMA_TO_DEVICE);
- if (dma_mapping_error(dma_dev, slot->dma_addr)) {
- bgmac_err(bgmac, "Mapping error of skb on ring 0x%X\n",
- ring->mmio_base);
- goto err_stop_drop;
- }
+ if (unlikely(dma_mapping_error(dma_dev, slot->dma_addr)))
+ goto err_dma_head;
- ctl0 = BGMAC_DESC_CTL0_IOC | BGMAC_DESC_CTL0_SOF | BGMAC_DESC_CTL0_EOF;
- if (ring->end == ring->num_slots - 1)
- ctl0 |= BGMAC_DESC_CTL0_EOT;
- ctl1 = skb->len & BGMAC_DESC_CTL1_LEN;
+ flags = BGMAC_DESC_CTL0_SOF;
+ if (!nr_frags)
+ flags |= BGMAC_DESC_CTL0_EOF | BGMAC_DESC_CTL0_IOC;
+
+ bgmac_dma_tx_add_buf(bgmac, ring, index, skb_headlen(skb), flags);
+ flags = 0;
+
+ for (i = 0; i < nr_frags; i++) {
+ struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
+ int len = skb_frag_size(frag);
+
+ index = (index + 1) % BGMAC_TX_RING_SLOTS;
+ slot = &ring->slots[index];
+ slot->dma_addr = skb_frag_dma_map(dma_dev, frag, 0,
+ len, DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(dma_dev, slot->dma_addr)))
+ goto err_dma;
- dma_desc = ring->cpu_base;
- dma_desc += ring->end;
- dma_desc->addr_low = cpu_to_le32(lower_32_bits(slot->dma_addr));
- dma_desc->addr_high = cpu_to_le32(upper_32_bits(slot->dma_addr));
- dma_desc->ctl0 = cpu_to_le32(ctl0);
- dma_desc->ctl1 = cpu_to_le32(ctl1);
+ if (i == nr_frags - 1)
+ flags |= BGMAC_DESC_CTL0_EOF | BGMAC_DESC_CTL0_IOC;
+
+ bgmac_dma_tx_add_buf(bgmac, ring, index, len, flags);
+ }
+
+ slot->skb = skb;
netdev_sent_queue(net_dev, skb->len);
@@ -170,20 +208,35 @@ static netdev_tx_t bgmac_dma_tx_add(stru
/* Increase ring->end to point empty slot. We tell hardware the first
* slot it should *not* read.
*/
- if (++ring->end >= BGMAC_TX_RING_SLOTS)
- ring->end = 0;
+ ring->end = (index + 1) % BGMAC_TX_RING_SLOTS;
bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_INDEX,
ring->index_base +
ring->end * sizeof(struct bgmac_dma_desc));
- /* Always keep one slot free to allow detecting bugged calls. */
- if (--free_slots == 1)
+ free_slots -= nr_frags + 1;
+ if (free_slots < 8)
netif_stop_queue(net_dev);
return NETDEV_TX_OK;
-err_stop_drop:
- netif_stop_queue(net_dev);
+err_dma:
+ dma_unmap_single(dma_dev, slot->dma_addr, skb_headlen(skb),
+ DMA_TO_DEVICE);
+
+ while (i > 0) {
+ int index = (ring->end + i) % BGMAC_TX_RING_SLOTS;
+ struct bgmac_slot_info *slot = &ring->slots[index];
+ u32 ctl1 = le32_to_cpu(ring->cpu_base[index].ctl1);
+ int len = ctl1 & BGMAC_DESC_CTL1_LEN;
+
+ dma_unmap_page(dma_dev, slot->dma_addr, len, DMA_TO_DEVICE);
+ }
+
+err_dma_head:
+ bgmac_err(bgmac, "Mapping error of skb on ring 0x%X\n",
+ ring->mmio_base);
+
+err_drop:
dev_kfree_skb(skb);
return NETDEV_TX_OK;
}
@@ -205,32 +258,45 @@ static void bgmac_dma_tx_free(struct bgm
while (ring->start != empty_slot) {
struct bgmac_slot_info *slot = &ring->slots[ring->start];
+ u32 ctl1 = le32_to_cpu(ring->cpu_base[ring->start].ctl1);
+ int len = ctl1 & BGMAC_DESC_CTL1_LEN;
- if (slot->skb) {
+ if (!slot->dma_addr) {
+ bgmac_err(bgmac, "Hardware reported transmission for empty TX ring slot %d! End of ring: %d\n",
+ ring->start, ring->end);
+ goto next;
+ }
+
+ if (ctl1 & BGMAC_DESC_CTL0_SOF)
/* Unmap no longer used buffer */
- dma_unmap_single(dma_dev, slot->dma_addr,
- slot->skb->len, DMA_TO_DEVICE);
- slot->dma_addr = 0;
+ dma_unmap_single(dma_dev, slot->dma_addr, len,
+ DMA_TO_DEVICE);
+ else
+ dma_unmap_page(dma_dev, slot->dma_addr, len,
+ DMA_TO_DEVICE);
+ if (slot->skb) {
bytes_compl += slot->skb->len;
pkts_compl++;
/* Free memory! :) */
dev_kfree_skb(slot->skb);
slot->skb = NULL;
- } else {
- bgmac_err(bgmac, "Hardware reported transmission for empty TX ring slot %d! End of ring: %d\n",
- ring->start, ring->end);
}
+next:
+ slot->dma_addr = 0;
if (++ring->start >= BGMAC_TX_RING_SLOTS)
ring->start = 0;
freed = true;
}
+ if (!pkts_compl)
+ return;
+
netdev_completed_queue(bgmac->net_dev, pkts_compl, bytes_compl);
- if (freed && netif_queue_stopped(bgmac->net_dev))
+ if (netif_queue_stopped(bgmac->net_dev))
netif_wake_queue(bgmac->net_dev);
}
@@ -439,17 +505,25 @@ static void bgmac_dma_tx_ring_free(struc
struct bgmac_dma_ring *ring)
{
struct device *dma_dev = bgmac->core->dma_dev;
+ struct bgmac_dma_desc *dma_desc = ring->cpu_base;
struct bgmac_slot_info *slot;
int i;
for (i = 0; i < ring->num_slots; i++) {
+ int len = dma_desc[i].ctl1 & BGMAC_DESC_CTL1_LEN;
+
slot = &ring->slots[i];
- if (slot->skb) {
- if (slot->dma_addr)
- dma_unmap_single(dma_dev, slot->dma_addr,
- slot->skb->len, DMA_TO_DEVICE);
- dev_kfree_skb(slot->skb);
- }
+ dev_kfree_skb(slot->skb);
+
+ if (!slot->dma_addr)
+ continue;
+
+ if (slot->skb)
+ dma_unmap_single(dma_dev, slot->dma_addr,
+ len, DMA_TO_DEVICE);
+ else
+ dma_unmap_page(dma_dev, slot->dma_addr,
+ len, DMA_TO_DEVICE);
}
}
@@ -1583,6 +1657,10 @@ static int bgmac_probe(struct bcma_devic
goto err_dma_free;
}
+ net_dev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
+ net_dev->hw_features = net_dev->features;
+ net_dev->vlan_features = net_dev->features;
+
err = register_netdev(bgmac->net_dev);
if (err) {
bgmac_err(bgmac, "Cannot register net device\n");