forked from luck/tmp_suning_uos_patched
IB/ipoib: Use NAPI in UD/TX flows
Instead of explicit call to poll_cq of the tx ring, use the NAPI mechanism to handle the completions of each packet that has been sent to the HW. The next major changes were taken: * The driver init completion function in the creation of the send CQ, that function triggers the napi scheduling. * The driver uses CQ for RX for both modes UD and CM, and CQ for TX for CM and UD. Cc: Kamal Heib <kamalh@mellanox.com> Signed-off-by: Erez Shitrit <erezsh@mellanox.com> Reviewed-by: Alex Vesker <valex@mellanox.com> Signed-off-by: Leon Romanovsky <leonro@mellanox.com> Signed-off-by: Doug Ledford <dledford@redhat.com>
This commit is contained in:
parent
2c104ea683
commit
8966e28d2e
|
@ -331,7 +331,8 @@ struct ipoib_dev_priv {
|
|||
|
||||
struct net_device *dev;
|
||||
|
||||
struct napi_struct napi;
|
||||
struct napi_struct send_napi;
|
||||
struct napi_struct recv_napi;
|
||||
|
||||
unsigned long flags;
|
||||
|
||||
|
@ -408,7 +409,6 @@ struct ipoib_dev_priv {
|
|||
#endif
|
||||
u64 hca_caps;
|
||||
struct ipoib_ethtool_st ethtool;
|
||||
struct timer_list poll_timer;
|
||||
unsigned max_send_sge;
|
||||
bool sm_fullmember_sendonly_support;
|
||||
const struct net_device_ops *rn_ops;
|
||||
|
@ -475,9 +475,10 @@ extern struct workqueue_struct *ipoib_workqueue;
|
|||
|
||||
/* functions */
|
||||
|
||||
int ipoib_poll(struct napi_struct *napi, int budget);
|
||||
void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr);
|
||||
void ipoib_send_comp_handler(struct ib_cq *cq, void *dev_ptr);
|
||||
int ipoib_rx_poll(struct napi_struct *napi, int budget);
|
||||
int ipoib_tx_poll(struct napi_struct *napi, int budget);
|
||||
void ipoib_ib_rx_completion(struct ib_cq *cq, void *ctx_ptr);
|
||||
void ipoib_ib_tx_completion(struct ib_cq *cq, void *ctx_ptr);
|
||||
|
||||
struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
|
||||
struct ib_pd *pd, struct rdma_ah_attr *attr);
|
||||
|
|
|
@ -757,30 +757,35 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
|
|||
return;
|
||||
}
|
||||
|
||||
if ((priv->tx_head - priv->tx_tail) == ipoib_sendq_size - 1) {
|
||||
ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n",
|
||||
tx->qp->qp_num);
|
||||
netif_stop_queue(dev);
|
||||
}
|
||||
|
||||
skb_orphan(skb);
|
||||
skb_dst_drop(skb);
|
||||
|
||||
if (netif_queue_stopped(dev))
|
||||
if (ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP |
|
||||
IB_CQ_REPORT_MISSED_EVENTS)) {
|
||||
ipoib_warn(priv, "IPoIB/CM:request notify on send CQ failed\n");
|
||||
napi_schedule(&priv->send_napi);
|
||||
}
|
||||
|
||||
rc = post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1), tx_req);
|
||||
if (unlikely(rc)) {
|
||||
ipoib_warn(priv, "post_send failed, error %d\n", rc);
|
||||
ipoib_warn(priv, "IPoIB/CM:post_send failed, error %d\n", rc);
|
||||
++dev->stats.tx_errors;
|
||||
ipoib_dma_unmap_tx(priv, tx_req);
|
||||
dev_kfree_skb_any(skb);
|
||||
|
||||
if (netif_queue_stopped(dev))
|
||||
netif_wake_queue(dev);
|
||||
} else {
|
||||
netif_trans_update(dev);
|
||||
++tx->tx_head;
|
||||
++priv->tx_head;
|
||||
if ((priv->tx_head - priv->tx_tail) == ipoib_sendq_size) {
|
||||
ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n",
|
||||
tx->qp->qp_num);
|
||||
netif_stop_queue(dev);
|
||||
rc = ib_req_notify_cq(priv->send_cq,
|
||||
IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
|
||||
if (rc < 0)
|
||||
ipoib_warn(priv, "request notify on send CQ failed\n");
|
||||
else if (rc)
|
||||
ipoib_send_comp_handler(priv->send_cq, dev);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -815,9 +820,10 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
|
|||
|
||||
++tx->tx_tail;
|
||||
++priv->tx_tail;
|
||||
if (unlikely((priv->tx_head - priv->tx_tail) == ipoib_sendq_size >> 1) &&
|
||||
netif_queue_stopped(dev) &&
|
||||
test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
|
||||
|
||||
if (unlikely(netif_queue_stopped(dev) &&
|
||||
(priv->tx_head - priv->tx_tail) <= ipoib_sendq_size >> 1 &&
|
||||
test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)))
|
||||
netif_wake_queue(dev);
|
||||
|
||||
if (wc->status != IB_WC_SUCCESS &&
|
||||
|
@ -1046,7 +1052,7 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_
|
|||
{
|
||||
struct ipoib_dev_priv *priv = ipoib_priv(dev);
|
||||
struct ib_qp_init_attr attr = {
|
||||
.send_cq = priv->recv_cq,
|
||||
.send_cq = priv->send_cq,
|
||||
.recv_cq = priv->recv_cq,
|
||||
.srq = priv->cm.srq,
|
||||
.cap.max_send_wr = ipoib_sendq_size,
|
||||
|
@ -1220,9 +1226,9 @@ static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p)
|
|||
tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)];
|
||||
ipoib_dma_unmap_tx(priv, tx_req);
|
||||
dev_kfree_skb_any(tx_req->skb);
|
||||
netif_tx_lock_bh(p->dev);
|
||||
++p->tx_tail;
|
||||
++priv->tx_tail;
|
||||
netif_tx_lock_bh(p->dev);
|
||||
if (unlikely(priv->tx_head - priv->tx_tail == ipoib_sendq_size >> 1) &&
|
||||
netif_queue_stopped(p->dev) &&
|
||||
test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
|
||||
|
|
|
@ -264,7 +264,7 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
|
|||
likely(wc->wc_flags & IB_WC_IP_CSUM_OK))
|
||||
skb->ip_summed = CHECKSUM_UNNECESSARY;
|
||||
|
||||
napi_gro_receive(&priv->napi, skb);
|
||||
napi_gro_receive(&priv->recv_napi, skb);
|
||||
|
||||
repost:
|
||||
if (unlikely(ipoib_ib_post_receive(dev, wr_id)))
|
||||
|
@ -406,9 +406,10 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
|
|||
dev_kfree_skb_any(tx_req->skb);
|
||||
|
||||
++priv->tx_tail;
|
||||
if (unlikely((priv->tx_head - priv->tx_tail) == ipoib_sendq_size >> 1) &&
|
||||
netif_queue_stopped(dev) &&
|
||||
test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
|
||||
|
||||
if (unlikely(netif_queue_stopped(dev) &&
|
||||
((priv->tx_head - priv->tx_tail) <= ipoib_sendq_size >> 1) &&
|
||||
test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)))
|
||||
netif_wake_queue(dev);
|
||||
|
||||
if (wc->status != IB_WC_SUCCESS &&
|
||||
|
@ -430,17 +431,23 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
|
|||
static int poll_tx(struct ipoib_dev_priv *priv)
|
||||
{
|
||||
int n, i;
|
||||
struct ib_wc *wc;
|
||||
|
||||
n = ib_poll_cq(priv->send_cq, MAX_SEND_CQE, priv->send_wc);
|
||||
for (i = 0; i < n; ++i)
|
||||
ipoib_ib_handle_tx_wc(priv->dev, priv->send_wc + i);
|
||||
|
||||
for (i = 0; i < n; ++i) {
|
||||
wc = priv->send_wc + i;
|
||||
if (wc->wr_id & IPOIB_OP_CM)
|
||||
ipoib_cm_handle_tx_wc(priv->dev, priv->send_wc + i);
|
||||
else
|
||||
ipoib_ib_handle_tx_wc(priv->dev, priv->send_wc + i);
|
||||
}
|
||||
return n == MAX_SEND_CQE;
|
||||
}
|
||||
|
||||
int ipoib_poll(struct napi_struct *napi, int budget)
|
||||
int ipoib_rx_poll(struct napi_struct *napi, int budget)
|
||||
{
|
||||
struct ipoib_dev_priv *priv = container_of(napi, struct ipoib_dev_priv, napi);
|
||||
struct ipoib_dev_priv *priv =
|
||||
container_of(napi, struct ipoib_dev_priv, recv_napi);
|
||||
struct net_device *dev = priv->dev;
|
||||
int done;
|
||||
int t;
|
||||
|
@ -464,8 +471,9 @@ int ipoib_poll(struct napi_struct *napi, int budget)
|
|||
ipoib_cm_handle_rx_wc(dev, wc);
|
||||
else
|
||||
ipoib_ib_handle_rx_wc(dev, wc);
|
||||
} else
|
||||
ipoib_cm_handle_tx_wc(priv->dev, wc);
|
||||
} else {
|
||||
pr_warn("%s: Got unexpected wqe id\n", __func__);
|
||||
}
|
||||
}
|
||||
|
||||
if (n != t)
|
||||
|
@ -484,33 +492,47 @@ int ipoib_poll(struct napi_struct *napi, int budget)
|
|||
return done;
|
||||
}
|
||||
|
||||
void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)
|
||||
int ipoib_tx_poll(struct napi_struct *napi, int budget)
|
||||
{
|
||||
struct net_device *dev = dev_ptr;
|
||||
struct ipoib_dev_priv *priv = ipoib_priv(dev);
|
||||
struct ipoib_dev_priv *priv = container_of(napi, struct ipoib_dev_priv,
|
||||
send_napi);
|
||||
struct net_device *dev = priv->dev;
|
||||
int n, i;
|
||||
struct ib_wc *wc;
|
||||
|
||||
napi_schedule(&priv->napi);
|
||||
poll_more:
|
||||
n = ib_poll_cq(priv->send_cq, MAX_SEND_CQE, priv->send_wc);
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
wc = priv->send_wc + i;
|
||||
if (wc->wr_id & IPOIB_OP_CM)
|
||||
ipoib_cm_handle_tx_wc(dev, wc);
|
||||
else
|
||||
ipoib_ib_handle_tx_wc(dev, wc);
|
||||
}
|
||||
|
||||
if (n < budget) {
|
||||
napi_complete(napi);
|
||||
if (unlikely(ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP |
|
||||
IB_CQ_REPORT_MISSED_EVENTS)) &&
|
||||
napi_reschedule(napi))
|
||||
goto poll_more;
|
||||
}
|
||||
return n < 0 ? 0 : n;
|
||||
}
|
||||
|
||||
static void drain_tx_cq(struct net_device *dev)
|
||||
void ipoib_ib_rx_completion(struct ib_cq *cq, void *ctx_ptr)
|
||||
{
|
||||
struct ipoib_dev_priv *priv = ipoib_priv(dev);
|
||||
struct ipoib_dev_priv *priv = ctx_ptr;
|
||||
|
||||
netif_tx_lock(dev);
|
||||
while (poll_tx(priv))
|
||||
; /* nothing */
|
||||
|
||||
if (netif_queue_stopped(dev))
|
||||
mod_timer(&priv->poll_timer, jiffies + 1);
|
||||
|
||||
netif_tx_unlock(dev);
|
||||
napi_schedule(&priv->recv_napi);
|
||||
}
|
||||
|
||||
void ipoib_send_comp_handler(struct ib_cq *cq, void *dev_ptr)
|
||||
void ipoib_ib_tx_completion(struct ib_cq *cq, void *ctx_ptr)
|
||||
{
|
||||
struct ipoib_dev_priv *priv = ipoib_priv(dev_ptr);
|
||||
struct ipoib_dev_priv *priv = ctx_ptr;
|
||||
|
||||
mod_timer(&priv->poll_timer, jiffies);
|
||||
napi_schedule(&priv->send_napi);
|
||||
}
|
||||
|
||||
static inline int post_send(struct ipoib_dev_priv *priv,
|
||||
|
@ -614,14 +636,17 @@ int ipoib_send(struct net_device *dev, struct sk_buff *skb,
|
|||
/* increase the tx_head after send success, but use it for queue state */
|
||||
if (priv->tx_head - priv->tx_tail == ipoib_sendq_size - 1) {
|
||||
ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
|
||||
if (ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP))
|
||||
ipoib_warn(priv, "request notify on send CQ failed\n");
|
||||
netif_stop_queue(dev);
|
||||
}
|
||||
|
||||
skb_orphan(skb);
|
||||
skb_dst_drop(skb);
|
||||
|
||||
if (netif_queue_stopped(dev))
|
||||
if (ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP |
|
||||
IB_CQ_REPORT_MISSED_EVENTS))
|
||||
ipoib_warn(priv, "request notify on send CQ failed\n");
|
||||
|
||||
rc = post_send(priv, priv->tx_head & (ipoib_sendq_size - 1),
|
||||
address, dqpn, tx_req, phead, hlen);
|
||||
if (unlikely(rc)) {
|
||||
|
@ -638,11 +663,6 @@ int ipoib_send(struct net_device *dev, struct sk_buff *skb,
|
|||
rc = priv->tx_head;
|
||||
++priv->tx_head;
|
||||
}
|
||||
|
||||
if (unlikely(priv->tx_head - priv->tx_tail > MAX_SEND_CQE))
|
||||
while (poll_tx(priv))
|
||||
; /* nothing */
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
@ -731,6 +751,22 @@ static void check_qp_movement_and_print(struct ipoib_dev_priv *priv,
|
|||
new_state, qp_attr.qp_state);
|
||||
}
|
||||
|
||||
static void ipoib_napi_enable(struct net_device *dev)
|
||||
{
|
||||
struct ipoib_dev_priv *priv = ipoib_priv(dev);
|
||||
|
||||
napi_enable(&priv->recv_napi);
|
||||
napi_enable(&priv->send_napi);
|
||||
}
|
||||
|
||||
static void ipoib_napi_disable(struct net_device *dev)
|
||||
{
|
||||
struct ipoib_dev_priv *priv = ipoib_priv(dev);
|
||||
|
||||
napi_disable(&priv->recv_napi);
|
||||
napi_disable(&priv->send_napi);
|
||||
}
|
||||
|
||||
int ipoib_ib_dev_stop_default(struct net_device *dev)
|
||||
{
|
||||
struct ipoib_dev_priv *priv = ipoib_priv(dev);
|
||||
|
@ -740,7 +776,7 @@ int ipoib_ib_dev_stop_default(struct net_device *dev)
|
|||
int i;
|
||||
|
||||
if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
|
||||
napi_disable(&priv->napi);
|
||||
ipoib_napi_disable(dev);
|
||||
|
||||
ipoib_cm_dev_stop(dev);
|
||||
|
||||
|
@ -797,7 +833,6 @@ int ipoib_ib_dev_stop_default(struct net_device *dev)
|
|||
ipoib_dbg(priv, "All sends and receives done.\n");
|
||||
|
||||
timeout:
|
||||
del_timer_sync(&priv->poll_timer);
|
||||
qp_attr.qp_state = IB_QPS_RESET;
|
||||
if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
|
||||
ipoib_warn(priv, "Failed to modify QP to RESET state\n");
|
||||
|
@ -819,13 +854,6 @@ int ipoib_ib_dev_stop(struct net_device *dev)
|
|||
return 0;
|
||||
}
|
||||
|
||||
void ipoib_ib_tx_timer_func(struct timer_list *t)
|
||||
{
|
||||
struct ipoib_dev_priv *priv = from_timer(priv, t, poll_timer);
|
||||
|
||||
drain_tx_cq(priv->dev);
|
||||
}
|
||||
|
||||
int ipoib_ib_dev_open_default(struct net_device *dev)
|
||||
{
|
||||
struct ipoib_dev_priv *priv = ipoib_priv(dev);
|
||||
|
@ -850,7 +878,7 @@ int ipoib_ib_dev_open_default(struct net_device *dev)
|
|||
}
|
||||
|
||||
if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
|
||||
napi_enable(&priv->napi);
|
||||
ipoib_napi_enable(dev);
|
||||
|
||||
return 0;
|
||||
out:
|
||||
|
@ -965,8 +993,9 @@ void ipoib_drain_cq(struct net_device *dev)
|
|||
ipoib_cm_handle_rx_wc(dev, priv->ibwc + i);
|
||||
else
|
||||
ipoib_ib_handle_rx_wc(dev, priv->ibwc + i);
|
||||
} else
|
||||
ipoib_cm_handle_tx_wc(dev, priv->ibwc + i);
|
||||
} else {
|
||||
pr_warn("%s: Got unexpected wqe id\n", __func__);
|
||||
}
|
||||
}
|
||||
} while (n == IPOIB_NUM_WC);
|
||||
|
||||
|
|
|
@ -1616,13 +1616,29 @@ static void ipoib_neigh_hash_uninit(struct net_device *dev)
|
|||
wait_for_completion(&priv->ntbl.deleted);
|
||||
}
|
||||
|
||||
static void ipoib_napi_add(struct net_device *dev)
|
||||
{
|
||||
struct ipoib_dev_priv *priv = ipoib_priv(dev);
|
||||
|
||||
netif_napi_add(dev, &priv->recv_napi, ipoib_rx_poll, IPOIB_NUM_WC);
|
||||
netif_napi_add(dev, &priv->send_napi, ipoib_tx_poll, MAX_SEND_CQE);
|
||||
}
|
||||
|
||||
static void ipoib_napi_del(struct net_device *dev)
|
||||
{
|
||||
struct ipoib_dev_priv *priv = ipoib_priv(dev);
|
||||
|
||||
netif_napi_del(&priv->recv_napi);
|
||||
netif_napi_del(&priv->send_napi);
|
||||
}
|
||||
|
||||
static void ipoib_dev_uninit_default(struct net_device *dev)
|
||||
{
|
||||
struct ipoib_dev_priv *priv = ipoib_priv(dev);
|
||||
|
||||
ipoib_transport_dev_cleanup(dev);
|
||||
|
||||
netif_napi_del(&priv->napi);
|
||||
ipoib_napi_del(dev);
|
||||
|
||||
ipoib_cm_dev_cleanup(dev);
|
||||
|
||||
|
@ -1637,7 +1653,7 @@ static int ipoib_dev_init_default(struct net_device *dev)
|
|||
{
|
||||
struct ipoib_dev_priv *priv = ipoib_priv(dev);
|
||||
|
||||
netif_napi_add(dev, &priv->napi, ipoib_poll, NAPI_POLL_WEIGHT);
|
||||
ipoib_napi_add(dev);
|
||||
|
||||
/* Allocate RX/TX "rings" to hold queued skbs */
|
||||
priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring,
|
||||
|
@ -1665,8 +1681,6 @@ static int ipoib_dev_init_default(struct net_device *dev)
|
|||
priv->dev->dev_addr[2] = (priv->qp->qp_num >> 8) & 0xff;
|
||||
priv->dev->dev_addr[3] = (priv->qp->qp_num) & 0xff;
|
||||
|
||||
timer_setup(&priv->poll_timer, ipoib_ib_tx_timer_func, 0);
|
||||
|
||||
return 0;
|
||||
|
||||
out_tx_ring_cleanup:
|
||||
|
@ -1676,7 +1690,7 @@ static int ipoib_dev_init_default(struct net_device *dev)
|
|||
kfree(priv->rx_ring);
|
||||
|
||||
out:
|
||||
netif_napi_del(&priv->napi);
|
||||
ipoib_napi_del(dev);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
|
|
|
@ -156,7 +156,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
|
|||
};
|
||||
struct ib_cq_init_attr cq_attr = {};
|
||||
|
||||
int ret, size;
|
||||
int ret, size, req_vec;
|
||||
int i;
|
||||
|
||||
size = ipoib_recvq_size + 1;
|
||||
|
@ -171,17 +171,21 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
|
|||
if (ret != -ENOSYS)
|
||||
return -ENODEV;
|
||||
|
||||
req_vec = (priv->port - 1) * 2;
|
||||
|
||||
cq_attr.cqe = size;
|
||||
priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL,
|
||||
dev, &cq_attr);
|
||||
cq_attr.comp_vector = req_vec % priv->ca->num_comp_vectors;
|
||||
priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_rx_completion, NULL,
|
||||
priv, &cq_attr);
|
||||
if (IS_ERR(priv->recv_cq)) {
|
||||
printk(KERN_WARNING "%s: failed to create receive CQ\n", ca->name);
|
||||
goto out_cm_dev_cleanup;
|
||||
}
|
||||
|
||||
cq_attr.cqe = ipoib_sendq_size;
|
||||
priv->send_cq = ib_create_cq(priv->ca, ipoib_send_comp_handler, NULL,
|
||||
dev, &cq_attr);
|
||||
cq_attr.comp_vector = (req_vec + 1) % priv->ca->num_comp_vectors;
|
||||
priv->send_cq = ib_create_cq(priv->ca, ipoib_ib_tx_completion, NULL,
|
||||
priv, &cq_attr);
|
||||
if (IS_ERR(priv->send_cq)) {
|
||||
printk(KERN_WARNING "%s: failed to create send CQ\n", ca->name);
|
||||
goto out_free_recv_cq;
|
||||
|
@ -208,6 +212,9 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
|
|||
goto out_free_send_cq;
|
||||
}
|
||||
|
||||
if (ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP))
|
||||
goto out_free_send_cq;
|
||||
|
||||
for (i = 0; i < MAX_SKB_FRAGS + 1; ++i)
|
||||
priv->tx_sge[i].lkey = priv->pd->local_dma_lkey;
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user