ar71xx: split packets into multiple descriptors on ar716x

This improves performance when doing concurrent rx/tx on a single
ethernet MAC, e.g. when routing between VLANs.

Fixes #13072

Signed-off-by: Felix Fietkau <nbd@openwrt.org>

SVN-Revision: 42328
This commit is contained in:
Felix Fietkau 2014-08-29 19:42:08 +00:00
parent c23e0ed691
commit 2c680151e4
3 changed files with 105 additions and 28 deletions

View file

@ -52,10 +52,13 @@
#define AG71XX_TX_MTU_LEN 1540
#define AG71XX_TX_RING_SIZE_DEFAULT 32
#define AG71XX_TX_RING_SPLIT 256
#define AG71XX_TX_RING_DS_PER_PKT DIV_ROUND_UP(AG71XX_TX_MTU_LEN, \
AG71XX_TX_RING_SPLIT)
#define AG71XX_TX_RING_SIZE_DEFAULT 48
#define AG71XX_RX_RING_SIZE_DEFAULT 128
#define AG71XX_TX_RING_SIZE_MAX 32
#define AG71XX_TX_RING_SIZE_MAX 48
#define AG71XX_RX_RING_SIZE_MAX 128
#ifdef CONFIG_AG71XX_DEBUG
@ -99,7 +102,8 @@ struct ag71xx_ring {
struct ag71xx_buf *buf;
u8 *descs_cpu;
dma_addr_t descs_dma;
unsigned int desc_size;
u16 desc_split;
u16 desc_size;
unsigned int curr;
unsigned int dirty;
unsigned int size;

View file

@ -75,6 +75,9 @@ static void ag71xx_ethtool_get_ringparam(struct net_device *dev,
er->rx_pending = ag->rx_ring.size;
er->rx_mini_pending = 0;
er->rx_jumbo_pending = 0;
if (ag->tx_ring.desc_split)
er->tx_pending /= AG71XX_TX_RING_DS_PER_PKT;
}
static int ag71xx_ethtool_set_ringparam(struct net_device *dev,
@ -103,6 +106,9 @@ static int ag71xx_ethtool_set_ringparam(struct net_device *dev,
return err;
}
if (ag->tx_ring.desc_split)
tx_size *= AG71XX_TX_RING_DS_PER_PKT;
ag->tx_ring.size = tx_size;
ag->rx_ring.size = rx_size;

View file

@ -536,6 +536,7 @@ void ag71xx_link_adjust(struct ag71xx *ag)
u32 cfg2;
u32 ifctl;
u32 fifo5;
u32 fifo3;
if (!ag->link) {
ag71xx_hw_stop(ag);
@ -576,11 +577,18 @@ void ag71xx_link_adjust(struct ag71xx *ag)
}
if (pdata->is_ar91xx)
ag71xx_wr(ag, AG71XX_REG_FIFO_CFG3, 0x00780fff);
fifo3 = 0x00780fff;
else if (pdata->is_ar724x)
ag71xx_wr(ag, AG71XX_REG_FIFO_CFG3, pdata->fifo_cfg3);
fifo3 = pdata->fifo_cfg3;
else
ag71xx_wr(ag, AG71XX_REG_FIFO_CFG3, 0x008001ff);
fifo3 = 0x008001ff;
if (ag->tx_ring.desc_split) {
fifo3 &= 0xffff;
fifo3 |= ((2048 - ag->tx_ring.desc_split) / 4) << 16;
}
ag71xx_wr(ag, AG71XX_REG_FIFO_CFG3, fifo3);
if (pdata->set_speed)
pdata->set_speed(ag->speed);
@ -675,6 +683,49 @@ static int ag71xx_stop(struct net_device *dev)
return 0;
}
static int ag71xx_fill_dma_desc(struct ag71xx_ring *ring, u32 addr, int len)
{
int i;
struct ag71xx_desc *desc;
int ndesc = 0;
int split = ring->desc_split;
if (!split)
split = len;
while (len > 0) {
unsigned int cur_len = len;
i = (ring->curr + ndesc) % ring->size;
desc = ring->buf[i].desc;
if (!ag71xx_desc_empty(desc))
return -1;
if (cur_len > split) {
cur_len = split;
if (len < split + 4)
cur_len -= 4;
}
desc->data = addr;
addr += cur_len;
len -= cur_len;
if (len > 0)
cur_len |= DESC_MORE;
/* prevent early tx attempt of this descriptor */
if (!ndesc)
cur_len |= DESC_EMPTY;
desc->ctrl = cur_len;
ndesc++;
}
return ndesc;
}
static netdev_tx_t ag71xx_hard_start_xmit(struct sk_buff *skb,
struct net_device *dev)
{
@ -682,18 +733,12 @@ static netdev_tx_t ag71xx_hard_start_xmit(struct sk_buff *skb,
struct ag71xx_ring *ring = &ag->tx_ring;
struct ag71xx_desc *desc;
dma_addr_t dma_addr;
int i;
i = ring->curr % ring->size;
desc = ring->buf[i].desc;
if (!ag71xx_desc_empty(desc))
goto err_drop;
int i, n, ring_min;
if (ag71xx_has_ar8216(ag))
ag71xx_add_ar8216_header(ag, skb);
if (skb->len <= 0) {
if (skb->len <= 4) {
DBG("%s: packet len is too small\n", ag->dev->name);
goto err_drop;
}
@ -701,21 +746,33 @@ static netdev_tx_t ag71xx_hard_start_xmit(struct sk_buff *skb,
dma_addr = dma_map_single(&dev->dev, skb->data, skb->len,
DMA_TO_DEVICE);
netdev_sent_queue(dev, skb->len);
i = ring->curr % ring->size;
desc = ring->buf[i].desc;
/* setup descriptor fields */
n = ag71xx_fill_dma_desc(ring, (u32) dma_addr, skb->len & ag->desc_pktlen_mask);
if (n < 0)
goto err_drop_unmap;
i = (ring->curr + n - 1) % ring->size;
ring->buf[i].len = skb->len;
ring->buf[i].skb = skb;
ring->buf[i].timestamp = jiffies;
/* setup descriptor fields */
desc->data = (u32) dma_addr;
desc->ctrl = skb->len & ag->desc_pktlen_mask;
netdev_sent_queue(dev, skb->len);
desc->ctrl &= ~DESC_EMPTY;
ring->curr += n;
/* flush descriptor */
wmb();
ring->curr++;
if (ring->curr == (ring->dirty + ring->size)) {
DBG("%s: tx queue full\n", ag->dev->name);
ring_min = 2;
if (ring->desc_split)
ring_min *= AG71XX_TX_RING_DS_PER_PKT;
if (ring->curr - ring->dirty >= ring->size - ring_min) {
DBG("%s: tx queue full\n", dev->name);
netif_stop_queue(dev);
}
@ -726,6 +783,9 @@ static netdev_tx_t ag71xx_hard_start_xmit(struct sk_buff *skb,
return NETDEV_TX_OK;
err_drop_unmap:
dma_unmap_single(&dev->dev, dma_addr, skb->len, DMA_TO_DEVICE);
err_drop:
dev->stats.tx_dropped++;
@ -843,7 +903,6 @@ static int ag71xx_tx_packets(struct ag71xx *ag)
unsigned int i = ring->dirty % ring->size;
struct ag71xx_desc *desc = ring->buf[i].desc;
struct sk_buff *skb = ring->buf[i].skb;
int len = ring->buf[i].len;
if (!ag71xx_desc_empty(desc)) {
if (pdata->is_ar7240 &&
@ -854,19 +913,22 @@ static int ag71xx_tx_packets(struct ag71xx *ag)
ag71xx_wr(ag, AG71XX_REG_TX_STATUS, TX_STATUS_PS);
bytes_compl += len;
ag->dev->stats.tx_bytes += len;
ag->dev->stats.tx_packets++;
if (skb) {
dev_kfree_skb_any(skb);
ring->buf[i].skb = NULL;
dev_kfree_skb_any(skb);
ring->buf[i].skb = NULL;
bytes_compl += ring->buf[i].len;
sent++;
}
ring->dirty++;
sent++;
}
DBG("%s: %d packets sent out\n", ag->dev->name, sent);
ag->dev->stats.tx_bytes += bytes_compl;
ag->dev->stats.tx_packets += sent;
if (!sent)
return 0;
@ -1195,6 +1257,11 @@ static int ag71xx_probe(struct platform_device *pdev)
ag->max_frame_len = pdata->max_frame_len;
ag->desc_pktlen_mask = pdata->desc_pktlen_mask;
if (!pdata->is_ar724x && !pdata->is_ar91xx) {
ag->tx_ring.desc_split = AG71XX_TX_RING_SPLIT;
ag->tx_ring.size *= AG71XX_TX_RING_DS_PER_PKT;
}
ag->stop_desc = dma_alloc_coherent(NULL,
sizeof(struct ag71xx_desc), &ag->stop_desc_dma, GFP_KERNEL);