forcedeth: ring access

This patch modifys ring access by using pointers. This avoids computing
the current index and avoids accessing the base address of the rings.

Signed-Off-By: Ayaz Abdulla <aabdulla@nvidia.com>

Signed-off-by: Jeff Garzik <jeff@garzik.org>
This commit is contained in:
Ayaz Abdulla 2007-01-09 13:30:07 -05:00 committed by Jeff Garzik
parent d2f7841277
commit 761fcd9e3e

View File

@ -691,6 +691,12 @@ static const struct register_test nv_registers_test[] = {
{ 0,0 } { 0,0 }
}; };
struct nv_skb_map {
struct sk_buff *skb;
dma_addr_t dma;
unsigned int dma_len;
};
/* /*
* SMP locking: * SMP locking:
* All hardware access under dev->priv->lock, except the performance * All hardware access under dev->priv->lock, except the performance
@ -741,10 +747,12 @@ struct fe_priv {
/* rx specific fields. /* rx specific fields.
* Locking: Within irq hander or disable_irq+spin_lock(&np->lock); * Locking: Within irq hander or disable_irq+spin_lock(&np->lock);
*/ */
union ring_type get_rx, put_rx, first_rx, last_rx;
struct nv_skb_map *get_rx_ctx, *put_rx_ctx;
struct nv_skb_map *first_rx_ctx, *last_rx_ctx;
struct nv_skb_map *rx_skb;
union ring_type rx_ring; union ring_type rx_ring;
unsigned int cur_rx, refill_rx;
struct sk_buff **rx_skbuff;
dma_addr_t *rx_dma;
unsigned int rx_buf_sz; unsigned int rx_buf_sz;
unsigned int pkt_limit; unsigned int pkt_limit;
struct timer_list oom_kick; struct timer_list oom_kick;
@ -761,11 +769,12 @@ struct fe_priv {
/* /*
* tx specific fields. * tx specific fields.
*/ */
union ring_type get_tx, put_tx, first_tx, last_tx;
struct nv_skb_map *get_tx_ctx, *put_tx_ctx;
struct nv_skb_map *first_tx_ctx, *last_tx_ctx;
struct nv_skb_map *tx_skb;
union ring_type tx_ring; union ring_type tx_ring;
unsigned int next_tx, nic_tx;
struct sk_buff **tx_skbuff;
dma_addr_t *tx_dma;
unsigned int *tx_dma_len;
u32 tx_flags; u32 tx_flags;
int tx_ring_size; int tx_ring_size;
int tx_limit_start; int tx_limit_start;
@ -921,16 +930,10 @@ static void free_rings(struct net_device *dev)
pci_free_consistent(np->pci_dev, sizeof(struct ring_desc_ex) * (np->rx_ring_size + np->tx_ring_size), pci_free_consistent(np->pci_dev, sizeof(struct ring_desc_ex) * (np->rx_ring_size + np->tx_ring_size),
np->rx_ring.ex, np->ring_addr); np->rx_ring.ex, np->ring_addr);
} }
if (np->rx_skbuff) if (np->rx_skb)
kfree(np->rx_skbuff); kfree(np->rx_skb);
if (np->rx_dma) if (np->tx_skb)
kfree(np->rx_dma); kfree(np->tx_skb);
if (np->tx_skbuff)
kfree(np->tx_skbuff);
if (np->tx_dma)
kfree(np->tx_dma);
if (np->tx_dma_len)
kfree(np->tx_dma_len);
} }
static int using_multi_irqs(struct net_device *dev) static int using_multi_irqs(struct net_device *dev)
@ -1304,43 +1307,60 @@ static struct net_device_stats *nv_get_stats(struct net_device *dev)
static int nv_alloc_rx(struct net_device *dev) static int nv_alloc_rx(struct net_device *dev)
{ {
struct fe_priv *np = netdev_priv(dev); struct fe_priv *np = netdev_priv(dev);
unsigned int refill_rx = np->refill_rx; union ring_type less_rx;
int nr;
while (np->cur_rx != refill_rx) { if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
less_rx.orig = np->get_rx.orig;
if (less_rx.orig-- == np->first_rx.orig)
less_rx.orig = np->last_rx.orig;
} else {
less_rx.ex = np->get_rx.ex;
if (less_rx.ex-- == np->first_rx.ex)
less_rx.ex = np->last_rx.ex;
}
while (1) {
struct sk_buff *skb; struct sk_buff *skb;
nr = refill_rx % np->rx_ring_size; if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
if (np->rx_skbuff[nr] == NULL) { if (np->put_rx.orig == less_rx.orig)
break;
} else {
if (np->put_rx.ex == less_rx.ex)
break;
}
if (np->put_rx_ctx->skb == NULL) {
skb = dev_alloc_skb(np->rx_buf_sz + NV_RX_ALLOC_PAD); skb = dev_alloc_skb(np->rx_buf_sz + NV_RX_ALLOC_PAD);
if (!skb) if (!skb)
break; return 1;
skb->dev = dev; skb->dev = dev;
np->rx_skbuff[nr] = skb; np->put_rx_ctx->skb = skb;
} else { } else {
skb = np->rx_skbuff[nr]; skb = np->put_rx_ctx->skb;
} }
np->rx_dma[nr] = pci_map_single(np->pci_dev, skb->data, np->put_rx_ctx->dma = pci_map_single(np->pci_dev, skb->data,
skb->end-skb->data, PCI_DMA_FROMDEVICE); skb->end-skb->data, PCI_DMA_FROMDEVICE);
np->put_rx_ctx->dma_len = skb->end-skb->data;
if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) { if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
np->rx_ring.orig[nr].buf = cpu_to_le32(np->rx_dma[nr]); np->put_rx.orig->buf = cpu_to_le32(np->put_rx_ctx->dma);
wmb(); wmb();
np->rx_ring.orig[nr].flaglen = cpu_to_le32(np->rx_buf_sz | NV_RX_AVAIL); np->put_rx.orig->flaglen = cpu_to_le32(np->rx_buf_sz | NV_RX_AVAIL);
if (np->put_rx.orig++ == np->last_rx.orig)
np->put_rx.orig = np->first_rx.orig;
} else { } else {
np->rx_ring.ex[nr].bufhigh = cpu_to_le64(np->rx_dma[nr]) >> 32; np->put_rx.ex->bufhigh = cpu_to_le64(np->put_rx_ctx->dma) >> 32;
np->rx_ring.ex[nr].buflow = cpu_to_le64(np->rx_dma[nr]) & 0x0FFFFFFFF; np->put_rx.ex->buflow = cpu_to_le64(np->put_rx_ctx->dma) & 0x0FFFFFFFF;
wmb(); wmb();
np->rx_ring.ex[nr].flaglen = cpu_to_le32(np->rx_buf_sz | NV_RX2_AVAIL); np->put_rx.ex->flaglen = cpu_to_le32(np->rx_buf_sz | NV_RX2_AVAIL);
if (np->put_rx.ex++ == np->last_rx.ex)
np->put_rx.ex = np->first_rx.ex;
} }
dprintk(KERN_DEBUG "%s: nv_alloc_rx: Packet %d marked as Available\n", if (np->put_rx_ctx++ == np->last_rx_ctx)
dev->name, refill_rx); np->put_rx_ctx = np->first_rx_ctx;
refill_rx++;
} }
np->refill_rx = refill_rx;
if (np->cur_rx - refill_rx == np->rx_ring_size)
return 1;
return 0; return 0;
} }
@ -1388,29 +1408,53 @@ static void nv_init_rx(struct net_device *dev)
{ {
struct fe_priv *np = netdev_priv(dev); struct fe_priv *np = netdev_priv(dev);
int i; int i;
np->get_rx = np->put_rx = np->first_rx = np->rx_ring;
if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
np->last_rx.orig = &np->rx_ring.orig[np->rx_ring_size-1];
else
np->last_rx.ex = &np->rx_ring.ex[np->rx_ring_size-1];
np->get_rx_ctx = np->put_rx_ctx = np->first_rx_ctx = np->rx_skb;
np->last_rx_ctx = &np->rx_skb[np->rx_ring_size-1];
np->cur_rx = np->rx_ring_size; for (i = 0; i < np->rx_ring_size; i++) {
np->refill_rx = 0; if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
for (i = 0; i < np->rx_ring_size; i++)
if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
np->rx_ring.orig[i].flaglen = 0; np->rx_ring.orig[i].flaglen = 0;
else np->rx_ring.orig[i].buf = 0;
} else {
np->rx_ring.ex[i].flaglen = 0; np->rx_ring.ex[i].flaglen = 0;
np->rx_ring.ex[i].txvlan = 0;
np->rx_ring.ex[i].bufhigh = 0;
np->rx_ring.ex[i].buflow = 0;
}
np->rx_skb[i].skb = NULL;
np->rx_skb[i].dma = 0;
}
} }
static void nv_init_tx(struct net_device *dev) static void nv_init_tx(struct net_device *dev)
{ {
struct fe_priv *np = netdev_priv(dev); struct fe_priv *np = netdev_priv(dev);
int i; int i;
np->get_tx = np->put_tx = np->first_tx = np->tx_ring;
if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
np->last_tx.orig = &np->tx_ring.orig[np->tx_ring_size-1];
else
np->last_tx.ex = &np->tx_ring.ex[np->tx_ring_size-1];
np->get_tx_ctx = np->put_tx_ctx = np->first_tx_ctx = np->tx_skb;
np->last_tx_ctx = &np->tx_skb[np->tx_ring_size-1];
np->next_tx = np->nic_tx = 0;
for (i = 0; i < np->tx_ring_size; i++) { for (i = 0; i < np->tx_ring_size; i++) {
if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
np->tx_ring.orig[i].flaglen = 0; np->tx_ring.orig[i].flaglen = 0;
else np->tx_ring.orig[i].buf = 0;
} else {
np->tx_ring.ex[i].flaglen = 0; np->tx_ring.ex[i].flaglen = 0;
np->tx_skbuff[i] = NULL; np->tx_ring.ex[i].txvlan = 0;
np->tx_dma[i] = 0; np->tx_ring.ex[i].bufhigh = 0;
np->tx_ring.ex[i].buflow = 0;
}
np->tx_skb[i].skb = NULL;
np->tx_skb[i].dma = 0;
} }
} }
@ -1421,23 +1465,19 @@ static int nv_init_ring(struct net_device *dev)
return nv_alloc_rx(dev); return nv_alloc_rx(dev);
} }
static int nv_release_txskb(struct net_device *dev, unsigned int skbnr) static int nv_release_txskb(struct net_device *dev, struct nv_skb_map* tx_skb)
{ {
struct fe_priv *np = netdev_priv(dev); struct fe_priv *np = netdev_priv(dev);
dprintk(KERN_INFO "%s: nv_release_txskb for skbnr %d\n", if (tx_skb->dma) {
dev->name, skbnr); pci_unmap_page(np->pci_dev, tx_skb->dma,
tx_skb->dma_len,
if (np->tx_dma[skbnr]) {
pci_unmap_page(np->pci_dev, np->tx_dma[skbnr],
np->tx_dma_len[skbnr],
PCI_DMA_TODEVICE); PCI_DMA_TODEVICE);
np->tx_dma[skbnr] = 0; tx_skb->dma = 0;
} }
if (tx_skb->skb) {
if (np->tx_skbuff[skbnr]) { dev_kfree_skb_any(tx_skb->skb);
dev_kfree_skb_any(np->tx_skbuff[skbnr]); tx_skb->skb = NULL;
np->tx_skbuff[skbnr] = NULL;
return 1; return 1;
} else { } else {
return 0; return 0;
@ -1450,11 +1490,16 @@ static void nv_drain_tx(struct net_device *dev)
unsigned int i; unsigned int i;
for (i = 0; i < np->tx_ring_size; i++) { for (i = 0; i < np->tx_ring_size; i++) {
if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
np->tx_ring.orig[i].flaglen = 0; np->tx_ring.orig[i].flaglen = 0;
else np->tx_ring.orig[i].buf = 0;
} else {
np->tx_ring.ex[i].flaglen = 0; np->tx_ring.ex[i].flaglen = 0;
if (nv_release_txskb(dev, i)) np->tx_ring.ex[i].txvlan = 0;
np->tx_ring.ex[i].bufhigh = 0;
np->tx_ring.ex[i].buflow = 0;
}
if (nv_release_txskb(dev, &np->tx_skb[i]))
np->stats.tx_dropped++; np->stats.tx_dropped++;
} }
} }
@ -1463,18 +1508,24 @@ static void nv_drain_rx(struct net_device *dev)
{ {
struct fe_priv *np = netdev_priv(dev); struct fe_priv *np = netdev_priv(dev);
int i; int i;
for (i = 0; i < np->rx_ring_size; i++) { for (i = 0; i < np->rx_ring_size; i++) {
if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
np->rx_ring.orig[i].flaglen = 0; np->rx_ring.orig[i].flaglen = 0;
else np->rx_ring.orig[i].buf = 0;
} else {
np->rx_ring.ex[i].flaglen = 0; np->rx_ring.ex[i].flaglen = 0;
np->rx_ring.ex[i].txvlan = 0;
np->rx_ring.ex[i].bufhigh = 0;
np->rx_ring.ex[i].buflow = 0;
}
wmb(); wmb();
if (np->rx_skbuff[i]) { if (np->rx_skb[i].skb) {
pci_unmap_single(np->pci_dev, np->rx_dma[i], pci_unmap_single(np->pci_dev, np->rx_skb[i].dma,
np->rx_skbuff[i]->end-np->rx_skbuff[i]->data, np->rx_skb[i].skb->end-np->rx_skb[i].skb->data,
PCI_DMA_FROMDEVICE); PCI_DMA_FROMDEVICE);
dev_kfree_skb(np->rx_skbuff[i]); dev_kfree_skb(np->rx_skb[i].skb);
np->rx_skbuff[i] = NULL; np->rx_skb[i].skb = NULL;
} }
} }
} }
@ -1485,6 +1536,11 @@ static void drain_ring(struct net_device *dev)
nv_drain_rx(dev); nv_drain_rx(dev);
} }
static inline u32 nv_get_empty_tx_slots(struct fe_priv *np)
{
return (u32)(np->tx_ring_size - ((np->tx_ring_size + (np->put_tx_ctx - np->get_tx_ctx)) % np->tx_ring_size));
}
/* /*
* nv_start_xmit: dev->hard_start_xmit function * nv_start_xmit: dev->hard_start_xmit function
* Called with netif_tx_lock held. * Called with netif_tx_lock held.
@ -1495,14 +1551,17 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
u32 tx_flags = 0; u32 tx_flags = 0;
u32 tx_flags_extra = (np->desc_ver == DESC_VER_1 ? NV_TX_LASTPACKET : NV_TX2_LASTPACKET); u32 tx_flags_extra = (np->desc_ver == DESC_VER_1 ? NV_TX_LASTPACKET : NV_TX2_LASTPACKET);
unsigned int fragments = skb_shinfo(skb)->nr_frags; unsigned int fragments = skb_shinfo(skb)->nr_frags;
unsigned int nr = (np->next_tx - 1) % np->tx_ring_size;
unsigned int start_nr = np->next_tx % np->tx_ring_size;
unsigned int i; unsigned int i;
u32 offset = 0; u32 offset = 0;
u32 bcnt; u32 bcnt;
u32 size = skb->len-skb->data_len; u32 size = skb->len-skb->data_len;
u32 entries = (size >> NV_TX2_TSO_MAX_SHIFT) + ((size & (NV_TX2_TSO_MAX_SIZE-1)) ? 1 : 0); u32 entries = (size >> NV_TX2_TSO_MAX_SHIFT) + ((size & (NV_TX2_TSO_MAX_SIZE-1)) ? 1 : 0);
u32 empty_slots;
u32 tx_flags_vlan = 0; u32 tx_flags_vlan = 0;
union ring_type put_tx;
union ring_type start_tx;
union ring_type prev_tx;
struct nv_skb_map* prev_tx_ctx;
/* add fragments to entries count */ /* add fragments to entries count */
for (i = 0; i < fragments; i++) { for (i = 0; i < fragments; i++) {
@ -1512,32 +1571,46 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
spin_lock_irq(&np->lock); spin_lock_irq(&np->lock);
if ((np->next_tx - np->nic_tx + entries - 1) > np->tx_limit_stop) { empty_slots = nv_get_empty_tx_slots(np);
if ((empty_slots - np->tx_limit_stop) <= entries) {
spin_unlock_irq(&np->lock); spin_unlock_irq(&np->lock);
netif_stop_queue(dev); netif_stop_queue(dev);
return NETDEV_TX_BUSY; return NETDEV_TX_BUSY;
} }
if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
start_tx.orig = put_tx.orig = np->put_tx.orig;
else
start_tx.ex = put_tx.ex = np->put_tx.ex;
/* setup the header buffer */ /* setup the header buffer */
do { do {
prev_tx = put_tx;
prev_tx_ctx = np->put_tx_ctx;
bcnt = (size > NV_TX2_TSO_MAX_SIZE) ? NV_TX2_TSO_MAX_SIZE : size; bcnt = (size > NV_TX2_TSO_MAX_SIZE) ? NV_TX2_TSO_MAX_SIZE : size;
nr = (nr + 1) % np->tx_ring_size; np->put_tx_ctx->dma = pci_map_single(np->pci_dev, skb->data + offset, bcnt,
np->tx_dma[nr] = pci_map_single(np->pci_dev, skb->data + offset, bcnt,
PCI_DMA_TODEVICE); PCI_DMA_TODEVICE);
np->tx_dma_len[nr] = bcnt; np->put_tx_ctx->dma_len = bcnt;
if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) { if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
np->tx_ring.orig[nr].buf = cpu_to_le32(np->tx_dma[nr]); put_tx.orig->buf = cpu_to_le32(np->put_tx_ctx->dma);
np->tx_ring.orig[nr].flaglen = cpu_to_le32((bcnt-1) | tx_flags); put_tx.orig->flaglen = cpu_to_le32((bcnt-1) | tx_flags);
} else { } else {
np->tx_ring.ex[nr].bufhigh = cpu_to_le64(np->tx_dma[nr]) >> 32; put_tx.ex->bufhigh = cpu_to_le64(np->put_tx_ctx->dma) >> 32;
np->tx_ring.ex[nr].buflow = cpu_to_le64(np->tx_dma[nr]) & 0x0FFFFFFFF; put_tx.ex->buflow = cpu_to_le64(np->put_tx_ctx->dma) & 0x0FFFFFFFF;
np->tx_ring.ex[nr].flaglen = cpu_to_le32((bcnt-1) | tx_flags); put_tx.ex->flaglen = cpu_to_le32((bcnt-1) | tx_flags);
} }
tx_flags = np->tx_flags; tx_flags = np->tx_flags;
offset += bcnt; offset += bcnt;
size -= bcnt; size -= bcnt;
if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
if (put_tx.orig++ == np->last_tx.orig)
put_tx.orig = np->first_tx.orig;
} else {
if (put_tx.ex++ == np->last_tx.ex)
put_tx.ex = np->first_tx.ex;
}
if (np->put_tx_ctx++ == np->last_tx_ctx)
np->put_tx_ctx = np->first_tx_ctx;
} while (size); } while (size);
/* setup the fragments */ /* setup the fragments */
@ -1547,34 +1620,43 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
offset = 0; offset = 0;
do { do {
prev_tx = put_tx;
prev_tx_ctx = np->put_tx_ctx;
bcnt = (size > NV_TX2_TSO_MAX_SIZE) ? NV_TX2_TSO_MAX_SIZE : size; bcnt = (size > NV_TX2_TSO_MAX_SIZE) ? NV_TX2_TSO_MAX_SIZE : size;
nr = (nr + 1) % np->tx_ring_size; np->put_tx_ctx->dma = pci_map_page(np->pci_dev, frag->page, frag->page_offset+offset, bcnt,
PCI_DMA_TODEVICE);
np->tx_dma[nr] = pci_map_page(np->pci_dev, frag->page, frag->page_offset+offset, bcnt, np->put_tx_ctx->dma_len = bcnt;
PCI_DMA_TODEVICE);
np->tx_dma_len[nr] = bcnt;
if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) { if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
np->tx_ring.orig[nr].buf = cpu_to_le32(np->tx_dma[nr]); put_tx.orig->buf = cpu_to_le32(np->put_tx_ctx->dma);
np->tx_ring.orig[nr].flaglen = cpu_to_le32((bcnt-1) | tx_flags); put_tx.orig->flaglen = cpu_to_le32((bcnt-1) | tx_flags);
} else { } else {
np->tx_ring.ex[nr].bufhigh = cpu_to_le64(np->tx_dma[nr]) >> 32; put_tx.ex->bufhigh = cpu_to_le64(np->put_tx_ctx->dma) >> 32;
np->tx_ring.ex[nr].buflow = cpu_to_le64(np->tx_dma[nr]) & 0x0FFFFFFFF; put_tx.ex->buflow = cpu_to_le64(np->put_tx_ctx->dma) & 0x0FFFFFFFF;
np->tx_ring.ex[nr].flaglen = cpu_to_le32((bcnt-1) | tx_flags); put_tx.ex->flaglen = cpu_to_le32((bcnt-1) | tx_flags);
} }
offset += bcnt; offset += bcnt;
size -= bcnt; size -= bcnt;
if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
if (put_tx.orig++ == np->last_tx.orig)
put_tx.orig = np->first_tx.orig;
} else {
if (put_tx.ex++ == np->last_tx.ex)
put_tx.ex = np->first_tx.ex;
}
if (np->put_tx_ctx++ == np->last_tx_ctx)
np->put_tx_ctx = np->first_tx_ctx;
} while (size); } while (size);
} }
/* set last fragment flag */ /* set last fragment flag */
if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) { if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
np->tx_ring.orig[nr].flaglen |= cpu_to_le32(tx_flags_extra); prev_tx.orig->flaglen |= cpu_to_le32(tx_flags_extra);
} else { else
np->tx_ring.ex[nr].flaglen |= cpu_to_le32(tx_flags_extra); prev_tx.ex->flaglen |= cpu_to_le32(tx_flags_extra);
}
np->tx_skbuff[nr] = skb; /* save skb in this slot's context area */
prev_tx_ctx->skb = skb;
if (skb_is_gso(skb)) if (skb_is_gso(skb))
tx_flags_extra = NV_TX2_TSO | (skb_shinfo(skb)->gso_size << NV_TX2_TSO_SHIFT); tx_flags_extra = NV_TX2_TSO | (skb_shinfo(skb)->gso_size << NV_TX2_TSO_SHIFT);
@ -1589,14 +1671,17 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
/* set tx flags */ /* set tx flags */
if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) { if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
np->tx_ring.orig[start_nr].flaglen |= cpu_to_le32(tx_flags | tx_flags_extra); start_tx.orig->flaglen |= cpu_to_le32(tx_flags | tx_flags_extra);
np->put_tx.orig = put_tx.orig;
} else { } else {
np->tx_ring.ex[start_nr].txvlan = cpu_to_le32(tx_flags_vlan); start_tx.ex->txvlan = cpu_to_le32(tx_flags_vlan);
np->tx_ring.ex[start_nr].flaglen |= cpu_to_le32(tx_flags | tx_flags_extra); start_tx.ex->flaglen |= cpu_to_le32(tx_flags | tx_flags_extra);
np->put_tx.ex = put_tx.ex;
} }
dprintk(KERN_DEBUG "%s: nv_start_xmit: packet %d (entries %d) queued for transmission. tx_flags_extra: %x\n",
dev->name, np->next_tx, entries, tx_flags_extra); dprintk(KERN_DEBUG "%s: nv_start_xmit: entries %d queued for transmission. tx_flags_extra: %x\n",
dev->name, entries, tx_flags_extra);
{ {
int j; int j;
for (j=0; j<64; j++) { for (j=0; j<64; j++) {
@ -1607,8 +1692,6 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
dprintk("\n"); dprintk("\n");
} }
np->next_tx += entries;
dev->trans_start = jiffies; dev->trans_start = jiffies;
spin_unlock_irq(&np->lock); spin_unlock_irq(&np->lock);
writel(NVREG_TXRXCTL_KICK|np->txrxctl_bits, get_hwbase(dev) + NvRegTxRxControl); writel(NVREG_TXRXCTL_KICK|np->txrxctl_bits, get_hwbase(dev) + NvRegTxRxControl);
@ -1625,24 +1708,26 @@ static void nv_tx_done(struct net_device *dev)
{ {
struct fe_priv *np = netdev_priv(dev); struct fe_priv *np = netdev_priv(dev);
u32 flags; u32 flags;
unsigned int i;
struct sk_buff *skb; struct sk_buff *skb;
while (np->nic_tx != np->next_tx) { while (1) {
i = np->nic_tx % np->tx_ring_size; if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
if (np->get_tx.orig == np->put_tx.orig)
break;
flags = le32_to_cpu(np->get_tx.orig->flaglen);
} else {
if (np->get_tx.ex == np->put_tx.ex)
break;
flags = le32_to_cpu(np->get_tx.ex->flaglen);
}
if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) dprintk(KERN_DEBUG "%s: nv_tx_done: flags 0x%x.\n",
flags = le32_to_cpu(np->tx_ring.orig[i].flaglen); dev->name, flags);
else
flags = le32_to_cpu(np->tx_ring.ex[i].flaglen);
dprintk(KERN_DEBUG "%s: nv_tx_done: looking at packet %d, flags 0x%x.\n",
dev->name, np->nic_tx, flags);
if (flags & NV_TX_VALID) if (flags & NV_TX_VALID)
break; break;
if (np->desc_ver == DESC_VER_1) { if (np->desc_ver == DESC_VER_1) {
if (flags & NV_TX_LASTPACKET) { if (flags & NV_TX_LASTPACKET) {
skb = np->tx_skbuff[i]; skb = np->get_tx_ctx->skb;
if (flags & (NV_TX_RETRYERROR|NV_TX_CARRIERLOST|NV_TX_LATECOLLISION| if (flags & (NV_TX_RETRYERROR|NV_TX_CARRIERLOST|NV_TX_LATECOLLISION|
NV_TX_UNDERFLOW|NV_TX_ERROR)) { NV_TX_UNDERFLOW|NV_TX_ERROR)) {
if (flags & NV_TX_UNDERFLOW) if (flags & NV_TX_UNDERFLOW)
@ -1657,7 +1742,7 @@ static void nv_tx_done(struct net_device *dev)
} }
} else { } else {
if (flags & NV_TX2_LASTPACKET) { if (flags & NV_TX2_LASTPACKET) {
skb = np->tx_skbuff[i]; skb = np->get_tx_ctx->skb;
if (flags & (NV_TX2_RETRYERROR|NV_TX2_CARRIERLOST|NV_TX2_LATECOLLISION| if (flags & (NV_TX2_RETRYERROR|NV_TX2_CARRIERLOST|NV_TX2_LATECOLLISION|
NV_TX2_UNDERFLOW|NV_TX2_ERROR)) { NV_TX2_UNDERFLOW|NV_TX2_ERROR)) {
if (flags & NV_TX2_UNDERFLOW) if (flags & NV_TX2_UNDERFLOW)
@ -1671,10 +1756,18 @@ static void nv_tx_done(struct net_device *dev)
} }
} }
} }
nv_release_txskb(dev, i); nv_release_txskb(dev, np->get_tx_ctx);
np->nic_tx++; if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
if (np->get_tx.orig++ == np->last_tx.orig)
np->get_tx.orig = np->first_tx.orig;
} else {
if (np->get_tx.ex++ == np->last_tx.ex)
np->get_tx.ex = np->first_tx.ex;
}
if (np->get_tx_ctx++ == np->last_tx_ctx)
np->get_tx_ctx = np->first_tx_ctx;
} }
if (np->next_tx - np->nic_tx < np->tx_limit_start) if (nv_get_empty_tx_slots(np) > np->tx_limit_start)
netif_wake_queue(dev); netif_wake_queue(dev);
} }
@ -1698,9 +1791,8 @@ static void nv_tx_timeout(struct net_device *dev)
{ {
int i; int i;
printk(KERN_INFO "%s: Ring at %lx: next %d nic %d\n", printk(KERN_INFO "%s: Ring at %lx\n",
dev->name, (unsigned long)np->ring_addr, dev->name, (unsigned long)np->ring_addr);
np->next_tx, np->nic_tx);
printk(KERN_INFO "%s: Dumping tx registers\n", dev->name); printk(KERN_INFO "%s: Dumping tx registers\n", dev->name);
for (i=0;i<=np->register_size;i+= 32) { for (i=0;i<=np->register_size;i+= 32) {
printk(KERN_INFO "%3x: %08x %08x %08x %08x %08x %08x %08x %08x\n", printk(KERN_INFO "%3x: %08x %08x %08x %08x %08x %08x %08x %08x\n",
@ -1751,10 +1843,10 @@ static void nv_tx_timeout(struct net_device *dev)
nv_tx_done(dev); nv_tx_done(dev);
/* 3) if there are dead entries: clear everything */ /* 3) if there are dead entries: clear everything */
if (np->next_tx != np->nic_tx) { if (np->get_tx_ctx != np->put_tx_ctx) {
printk(KERN_DEBUG "%s: tx_timeout: dead entries!\n", dev->name); printk(KERN_DEBUG "%s: tx_timeout: dead entries!\n", dev->name);
nv_drain_tx(dev); nv_drain_tx(dev);
np->next_tx = np->nic_tx = 0; nv_init_tx(dev);
setup_hw_rings(dev, NV_SETUP_TX_RING); setup_hw_rings(dev, NV_SETUP_TX_RING);
netif_wake_queue(dev); netif_wake_queue(dev);
} }
@ -1827,22 +1919,22 @@ static int nv_rx_process(struct net_device *dev, int limit)
for (count = 0; count < limit; ++count) { for (count = 0; count < limit; ++count) {
struct sk_buff *skb; struct sk_buff *skb;
int len; int len;
int i;
if (np->cur_rx - np->refill_rx >= np->rx_ring_size)
break; /* we scanned the whole ring - do not continue */
i = np->cur_rx % np->rx_ring_size;
if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) { if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
flags = le32_to_cpu(np->rx_ring.orig[i].flaglen); if (np->get_rx.orig == np->put_rx.orig)
len = nv_descr_getlength(&np->rx_ring.orig[i], np->desc_ver); break; /* we scanned the whole ring - do not continue */
flags = le32_to_cpu(np->get_rx.orig->flaglen);
len = nv_descr_getlength(np->get_rx.orig, np->desc_ver);
} else { } else {
flags = le32_to_cpu(np->rx_ring.ex[i].flaglen); if (np->get_rx.ex == np->put_rx.ex)
len = nv_descr_getlength_ex(&np->rx_ring.ex[i], np->desc_ver); break; /* we scanned the whole ring - do not continue */
vlanflags = le32_to_cpu(np->rx_ring.ex[i].buflow); flags = le32_to_cpu(np->get_rx.ex->flaglen);
len = nv_descr_getlength_ex(np->get_rx.ex, np->desc_ver);
vlanflags = le32_to_cpu(np->get_rx.ex->buflow);
} }
dprintk(KERN_DEBUG "%s: nv_rx_process: looking at packet %d, flags 0x%x.\n", dprintk(KERN_DEBUG "%s: nv_rx_process: flags 0x%x.\n",
dev->name, np->cur_rx, flags); dev->name, flags);
if (flags & NV_RX_AVAIL) if (flags & NV_RX_AVAIL)
break; /* still owned by hardware, */ break; /* still owned by hardware, */
@ -1852,8 +1944,8 @@ static int nv_rx_process(struct net_device *dev, int limit)
* TODO: check if a prefetch of the first cacheline improves * TODO: check if a prefetch of the first cacheline improves
* the performance. * the performance.
*/ */
pci_unmap_single(np->pci_dev, np->rx_dma[i], pci_unmap_single(np->pci_dev, np->get_rx_ctx->dma,
np->rx_skbuff[i]->end-np->rx_skbuff[i]->data, np->get_rx_ctx->dma_len,
PCI_DMA_FROMDEVICE); PCI_DMA_FROMDEVICE);
{ {
@ -1862,7 +1954,7 @@ static int nv_rx_process(struct net_device *dev, int limit)
for (j=0; j<64; j++) { for (j=0; j<64; j++) {
if ((j%16) == 0) if ((j%16) == 0)
dprintk("\n%03x:", j); dprintk("\n%03x:", j);
dprintk(" %02x", ((unsigned char*)np->rx_skbuff[i]->data)[j]); dprintk(" %02x", ((unsigned char*)np->get_rx_ctx->skb->data)[j]);
} }
dprintk("\n"); dprintk("\n");
} }
@ -1892,7 +1984,7 @@ static int nv_rx_process(struct net_device *dev, int limit)
goto next_pkt; goto next_pkt;
} }
if (flags & NV_RX_ERROR4) { if (flags & NV_RX_ERROR4) {
len = nv_getlen(dev, np->rx_skbuff[i]->data, len); len = nv_getlen(dev, np->get_rx_ctx->skb->data, len);
if (len < 0) { if (len < 0) {
np->stats.rx_errors++; np->stats.rx_errors++;
goto next_pkt; goto next_pkt;
@ -1925,7 +2017,7 @@ static int nv_rx_process(struct net_device *dev, int limit)
goto next_pkt; goto next_pkt;
} }
if (flags & NV_RX2_ERROR4) { if (flags & NV_RX2_ERROR4) {
len = nv_getlen(dev, np->rx_skbuff[i]->data, len); len = nv_getlen(dev, np->get_rx_ctx->skb->data, len);
if (len < 0) { if (len < 0) {
np->stats.rx_errors++; np->stats.rx_errors++;
goto next_pkt; goto next_pkt;
@ -1944,20 +2036,20 @@ static int nv_rx_process(struct net_device *dev, int limit)
flags == NV_RX2_CHECKSUMOK2 || flags == NV_RX2_CHECKSUMOK2 ||
flags == NV_RX2_CHECKSUMOK3) { flags == NV_RX2_CHECKSUMOK3) {
dprintk(KERN_DEBUG "%s: hw checksum hit!.\n", dev->name); dprintk(KERN_DEBUG "%s: hw checksum hit!.\n", dev->name);
np->rx_skbuff[i]->ip_summed = CHECKSUM_UNNECESSARY; np->get_rx_ctx->skb->ip_summed = CHECKSUM_UNNECESSARY;
} else { } else {
dprintk(KERN_DEBUG "%s: hwchecksum miss!.\n", dev->name); dprintk(KERN_DEBUG "%s: hwchecksum miss!.\n", dev->name);
} }
} }
} }
/* got a valid packet - forward it to the network core */ /* got a valid packet - forward it to the network core */
skb = np->rx_skbuff[i]; skb = np->get_rx_ctx->skb;
np->rx_skbuff[i] = NULL; np->get_rx_ctx->skb = NULL;
skb_put(skb, len); skb_put(skb, len);
skb->protocol = eth_type_trans(skb, dev); skb->protocol = eth_type_trans(skb, dev);
dprintk(KERN_DEBUG "%s: nv_rx_process: packet %d with %d bytes, proto %d accepted.\n", dprintk(KERN_DEBUG "%s: nv_rx_process: %d bytes, proto %d accepted.\n",
dev->name, np->cur_rx, len, skb->protocol); dev->name, len, skb->protocol);
#ifdef CONFIG_FORCEDETH_NAPI #ifdef CONFIG_FORCEDETH_NAPI
if (np->vlangrp && (vlanflags & NV_RX3_VLAN_TAG_PRESENT)) if (np->vlangrp && (vlanflags & NV_RX3_VLAN_TAG_PRESENT))
vlan_hwaccel_receive_skb(skb, np->vlangrp, vlan_hwaccel_receive_skb(skb, np->vlangrp,
@ -1975,7 +2067,15 @@ static int nv_rx_process(struct net_device *dev, int limit)
np->stats.rx_packets++; np->stats.rx_packets++;
np->stats.rx_bytes += len; np->stats.rx_bytes += len;
next_pkt: next_pkt:
np->cur_rx++; if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
if (np->get_rx.orig++ == np->last_rx.orig)
np->get_rx.orig = np->first_rx.orig;
} else {
if (np->get_rx.ex++ == np->last_rx.ex)
np->get_rx.ex = np->first_rx.ex;
}
if (np->get_rx_ctx++ == np->last_rx_ctx)
np->get_rx_ctx = np->first_rx_ctx;
} }
return count; return count;
@ -3463,7 +3563,7 @@ static int nv_set_ringparam(struct net_device *dev, struct ethtool_ringparam* ri
{ {
struct fe_priv *np = netdev_priv(dev); struct fe_priv *np = netdev_priv(dev);
u8 __iomem *base = get_hwbase(dev); u8 __iomem *base = get_hwbase(dev);
u8 *rxtx_ring, *rx_skbuff, *tx_skbuff, *rx_dma, *tx_dma, *tx_dma_len; u8 *rxtx_ring, *rx_skbuff, *tx_skbuff;
dma_addr_t ring_addr; dma_addr_t ring_addr;
if (ring->rx_pending < RX_RING_MIN || if (ring->rx_pending < RX_RING_MIN ||
@ -3489,12 +3589,9 @@ static int nv_set_ringparam(struct net_device *dev, struct ethtool_ringparam* ri
sizeof(struct ring_desc_ex) * (ring->rx_pending + ring->tx_pending), sizeof(struct ring_desc_ex) * (ring->rx_pending + ring->tx_pending),
&ring_addr); &ring_addr);
} }
rx_skbuff = kmalloc(sizeof(struct sk_buff*) * ring->rx_pending, GFP_KERNEL); rx_skbuff = kmalloc(sizeof(struct nv_skb_map) * ring->rx_pending, GFP_KERNEL);
rx_dma = kmalloc(sizeof(dma_addr_t) * ring->rx_pending, GFP_KERNEL); tx_skbuff = kmalloc(sizeof(struct nv_skb_map) * ring->tx_pending, GFP_KERNEL);
tx_skbuff = kmalloc(sizeof(struct sk_buff*) * ring->tx_pending, GFP_KERNEL); if (!rxtx_ring || !rx_skbuff || !tx_skbuff) {
tx_dma = kmalloc(sizeof(dma_addr_t) * ring->tx_pending, GFP_KERNEL);
tx_dma_len = kmalloc(sizeof(unsigned int) * ring->tx_pending, GFP_KERNEL);
if (!rxtx_ring || !rx_skbuff || !rx_dma || !tx_skbuff || !tx_dma || !tx_dma_len) {
/* fall back to old rings */ /* fall back to old rings */
if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) { if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
if (rxtx_ring) if (rxtx_ring)
@ -3507,14 +3604,8 @@ static int nv_set_ringparam(struct net_device *dev, struct ethtool_ringparam* ri
} }
if (rx_skbuff) if (rx_skbuff)
kfree(rx_skbuff); kfree(rx_skbuff);
if (rx_dma)
kfree(rx_dma);
if (tx_skbuff) if (tx_skbuff)
kfree(tx_skbuff); kfree(tx_skbuff);
if (tx_dma)
kfree(tx_dma);
if (tx_dma_len)
kfree(tx_dma_len);
goto exit; goto exit;
} }
@ -3536,8 +3627,8 @@ static int nv_set_ringparam(struct net_device *dev, struct ethtool_ringparam* ri
/* set new values */ /* set new values */
np->rx_ring_size = ring->rx_pending; np->rx_ring_size = ring->rx_pending;
np->tx_ring_size = ring->tx_pending; np->tx_ring_size = ring->tx_pending;
np->tx_limit_stop = ring->tx_pending - TX_LIMIT_DIFFERENCE; np->tx_limit_stop = TX_LIMIT_DIFFERENCE;
np->tx_limit_start = ring->tx_pending - TX_LIMIT_DIFFERENCE - 1; np->tx_limit_start = TX_LIMIT_DIFFERENCE;
if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) { if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
np->rx_ring.orig = (struct ring_desc*)rxtx_ring; np->rx_ring.orig = (struct ring_desc*)rxtx_ring;
np->tx_ring.orig = &np->rx_ring.orig[np->rx_ring_size]; np->tx_ring.orig = &np->rx_ring.orig[np->rx_ring_size];
@ -3545,18 +3636,12 @@ static int nv_set_ringparam(struct net_device *dev, struct ethtool_ringparam* ri
np->rx_ring.ex = (struct ring_desc_ex*)rxtx_ring; np->rx_ring.ex = (struct ring_desc_ex*)rxtx_ring;
np->tx_ring.ex = &np->rx_ring.ex[np->rx_ring_size]; np->tx_ring.ex = &np->rx_ring.ex[np->rx_ring_size];
} }
np->rx_skbuff = (struct sk_buff**)rx_skbuff; np->rx_skb = (struct nv_skb_map*)rx_skbuff;
np->rx_dma = (dma_addr_t*)rx_dma; np->tx_skb = (struct nv_skb_map*)tx_skbuff;
np->tx_skbuff = (struct sk_buff**)tx_skbuff;
np->tx_dma = (dma_addr_t*)tx_dma;
np->tx_dma_len = (unsigned int*)tx_dma_len;
np->ring_addr = ring_addr; np->ring_addr = ring_addr;
memset(np->rx_skbuff, 0, sizeof(struct sk_buff*) * np->rx_ring_size); memset(np->rx_skb, 0, sizeof(struct nv_skb_map) * np->rx_ring_size);
memset(np->rx_dma, 0, sizeof(dma_addr_t) * np->rx_ring_size); memset(np->tx_skb, 0, sizeof(struct nv_skb_map) * np->tx_ring_size);
memset(np->tx_skbuff, 0, sizeof(struct sk_buff*) * np->tx_ring_size);
memset(np->tx_dma, 0, sizeof(dma_addr_t) * np->tx_ring_size);
memset(np->tx_dma_len, 0, sizeof(unsigned int) * np->tx_ring_size);
if (netif_running(dev)) { if (netif_running(dev)) {
/* reinit driver view of the queues */ /* reinit driver view of the queues */
@ -3953,7 +4038,7 @@ static int nv_loopback_test(struct net_device *dev)
dprintk(KERN_DEBUG "%s: loopback len mismatch %d vs %d\n", dprintk(KERN_DEBUG "%s: loopback len mismatch %d vs %d\n",
dev->name, len, pkt_len); dev->name, len, pkt_len);
} else { } else {
rx_skb = np->rx_skbuff[0]; rx_skb = np->rx_skb[0].skb;
for (i = 0; i < pkt_len; i++) { for (i = 0; i < pkt_len; i++) {
if (rx_skb->data[i] != (u8)(i & 0xff)) { if (rx_skb->data[i] != (u8)(i & 0xff)) {
ret = 0; ret = 0;
@ -4508,8 +4593,8 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
np->rx_ring_size = RX_RING_DEFAULT; np->rx_ring_size = RX_RING_DEFAULT;
np->tx_ring_size = TX_RING_DEFAULT; np->tx_ring_size = TX_RING_DEFAULT;
np->tx_limit_stop = np->tx_ring_size - TX_LIMIT_DIFFERENCE; np->tx_limit_stop = TX_LIMIT_DIFFERENCE;
np->tx_limit_start = np->tx_ring_size - TX_LIMIT_DIFFERENCE - 1; np->tx_limit_start = TX_LIMIT_DIFFERENCE;
if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) { if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
np->rx_ring.orig = pci_alloc_consistent(pci_dev, np->rx_ring.orig = pci_alloc_consistent(pci_dev,
@ -4526,18 +4611,12 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
goto out_unmap; goto out_unmap;
np->tx_ring.ex = &np->rx_ring.ex[np->rx_ring_size]; np->tx_ring.ex = &np->rx_ring.ex[np->rx_ring_size];
} }
np->rx_skbuff = kmalloc(sizeof(struct sk_buff*) * np->rx_ring_size, GFP_KERNEL); np->rx_skb = kmalloc(sizeof(struct nv_skb_map) * np->rx_ring_size, GFP_KERNEL);
np->rx_dma = kmalloc(sizeof(dma_addr_t) * np->rx_ring_size, GFP_KERNEL); np->tx_skb = kmalloc(sizeof(struct nv_skb_map) * np->tx_ring_size, GFP_KERNEL);
np->tx_skbuff = kmalloc(sizeof(struct sk_buff*) * np->tx_ring_size, GFP_KERNEL); if (!np->rx_skb || !np->tx_skb)
np->tx_dma = kmalloc(sizeof(dma_addr_t) * np->tx_ring_size, GFP_KERNEL);
np->tx_dma_len = kmalloc(sizeof(unsigned int) * np->tx_ring_size, GFP_KERNEL);
if (!np->rx_skbuff || !np->rx_dma || !np->tx_skbuff || !np->tx_dma || !np->tx_dma_len)
goto out_freering; goto out_freering;
memset(np->rx_skbuff, 0, sizeof(struct sk_buff*) * np->rx_ring_size); memset(np->rx_skb, 0, sizeof(struct nv_skb_map) * np->rx_ring_size);
memset(np->rx_dma, 0, sizeof(dma_addr_t) * np->rx_ring_size); memset(np->tx_skb, 0, sizeof(struct nv_skb_map) * np->tx_ring_size);
memset(np->tx_skbuff, 0, sizeof(struct sk_buff*) * np->tx_ring_size);
memset(np->tx_dma, 0, sizeof(dma_addr_t) * np->tx_ring_size);
memset(np->tx_dma_len, 0, sizeof(unsigned int) * np->tx_ring_size);
dev->open = nv_open; dev->open = nv_open;
dev->stop = nv_close; dev->stop = nv_close;