Merge branch ‘nic-thunderx-fix-communication-races-between-VF-PF’ [Linux 5.0]

This Linux kernel change "Merge branch ‘nic-thunderx-fix-communication-races-between-VF-PF’" is included in the Linux 5.0 release. This change is authored by David S. Miller <davem [at] davemloft.net> on Fri Feb 22 11:43:45 2019 -0800. The commit for this change in Linux stable tree is aaaf598 (patch). Other info about this change: Merge: efcc9bc 2e1c3ff

Merge branch 'nic-thunderx-fix-communication-races-between-VF-PF'

Vadim Lomovtsev says:

====================
nic: thunderx: fix communication races between VF & PF

The ThunderX CN88XX NIC Virtual Function driver uses mailbox interface
to communicate to physical function driver. Each of VF has it's own pair
of mailbox registers to read from and write to. The mailbox registers
has no protection from possible races, so it has to be implemented
at software side.

After long term testing by loop of 'ip link set <ifname> up/down'
command it was found that there are two possible scenarios when
race condition appears:
 1. VF receives link change message from PF and VF send RX mode
configuration message to PF in the same time from separate thread.
 2. PF receives RX mode configuration from VF and in the same time,
in separate thread PF detects link status change and sends appropriate
message to particular VF.

Both cases leads to mailbox data to be rewritten, NIC VF messaging control
data to be updated incorrectly and communication sequence gets broken.

This patch series is to address race condition with VF & PF communication.

Changes:
v1 -> v2
 - 0000: correct typo in cover letter subject: 'betwen' -> 'between';
 - move link state polling request task from pf to vf
   instead of cheking status of mailbox irq;
v2 -> v3
 - 0003: change return type of nicvf_send_cfg_done() function
   from int to void;
 - 0007: update subject and remove unused variable 'netdev'
   from nicvf_link_status_check_task() function;
====================

Signed-off-by: David S. Miller <[email protected]>

There is no are 0 lines of Linux source code added/deleted in this change. Code changes to Linux kernel are as follows.

 drivers/net/ethernet/cavium/thunder/nic_main.c | 114 +++----------------------
 1 file changed, 12 insertions(+), 102 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c
index 8ab71da..c902528 100644
--- a/drivers/net/ethernet/cavium/thunder/nic_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nic_main.c
@@ -57,14 +57,8 @@ struct nicpf {
 #define    NIC_GET_BGX_FROM_VF_LMAC_MAP(map)   ((map >> 4) & 0xF)
 #define    NIC_GET_LMAC_FROM_VF_LMAC_MAP(map)  (map & 0xF)
    u8          *vf_lmac_map;
-   struct delayed_work     dwork;
-   struct workqueue_struct *check_link;
-   u8          *link;
-   u8          *duplex;
-   u32         *speed;
    u16         cpi_base[MAX_NUM_VFS_SUPPORTED];
    u16         rssi_base[MAX_NUM_VFS_SUPPORTED];
-   bool            mbx_lock[MAX_NUM_VFS_SUPPORTED];

    /* MSI-X */
    u8          num_vec;
@@ -929,6 +923,10 @@ static void nic_config_timestamp(struct nicpf *nic, int vf, struct set_ptp *ptp)
    nic_reg_write(nic, NIC_PF_PKIND_0_15_CFG | (pkind_idx << 3), pkind_val);
 }

+/* Get BGX LMAC link status and update corresponding VF
+ * if there is a change, valid only if internal L2 switch
+ * is not present otherwise VF link is always treated as up
+ */
 static void nic_link_status_get(struct nicpf *nic, u8 vf)
 {
    union nic_mbx mbx = {};
@@ -944,10 +942,6 @@ static void nic_link_status_get(struct nicpf *nic, u8 vf)
    /* Get interface link status */
    bgx_get_lmac_link_state(nic->node, bgx, lmac, &link);

-   nic->link[vf] = link.link_up;
-   nic->duplex[vf] = link.duplex;
-   nic->speed[vf] = link.speed;
-
    /* Send a mbox message to VF with current link status */
    mbx.link_status.link_up = link.link_up;
    mbx.link_status.duplex = link.duplex;
@@ -970,8 +964,6 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
    int i;
    int ret = 0;

-   nic->mbx_lock[vf] = true;
-
    mbx_addr = nic_get_mbx_addr(vf);
    mbx_data = (u64 *)&mbx;

@@ -986,12 +978,7 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
    switch (mbx.msg.msg) {
    case NIC_MBOX_MSG_READY:
        nic_mbx_send_ready(nic, vf);
-       if (vf < nic->num_vf_en) {
-           nic->link[vf] = 0;
-           nic->duplex[vf] = 0;
-           nic->speed[vf] = 0;
-       }
-       goto unlock;
+       return;
    case NIC_MBOX_MSG_QS_CFG:
        reg_addr = NIC_PF_QSET_0_127_CFG |
               (mbx.qs.num << NIC_QS_ID_SHIFT);
@@ -1060,7 +1047,7 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
        break;
    case NIC_MBOX_MSG_RSS_SIZE:
        nic_send_rss_size(nic, vf);
-       goto unlock;
+       return;
    case NIC_MBOX_MSG_RSS_CFG:
    case NIC_MBOX_MSG_RSS_CFG_CONT:
        nic_config_rss(nic, &mbx.rss_cfg);
@@ -1078,19 +1065,19 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
        break;
    case NIC_MBOX_MSG_ALLOC_SQS:
        nic_alloc_sqs(nic, &mbx.sqs_alloc);
-       goto unlock;
+       return;
    case NIC_MBOX_MSG_NICVF_PTR:
        nic->nicvf[vf] = mbx.nicvf.nicvf;
        break;
    case NIC_MBOX_MSG_PNICVF_PTR:
        nic_send_pnicvf(nic, vf);
-       goto unlock;
+       return;
    case NIC_MBOX_MSG_SNICVF_PTR:
        nic_send_snicvf(nic, &mbx.nicvf);
-       goto unlock;
+       return;
    case NIC_MBOX_MSG_BGX_STATS:
        nic_get_bgx_stats(nic, &mbx.bgx_stats);
-       goto unlock;
+       return;
    case NIC_MBOX_MSG_LOOPBACK:
        ret = nic_config_loopback(nic, &mbx.lbk);
        break;
@@ -1099,7 +1086,7 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
        break;
    case NIC_MBOX_MSG_PFC:
        nic_pause_frame(nic, vf, &mbx.pfc);
-       goto unlock;
+       return;
    case NIC_MBOX_MSG_PTP_CFG:
        nic_config_timestamp(nic, vf, &mbx.ptp);
        break;
@@ -1143,7 +1130,7 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
            break;
        }
        nic_link_status_get(nic, vf);
-       goto unlock;
+       return;
    default:
        dev_err(&nic->pdev->dev,
            "Invalid msg from VF%d, msg 0x%x\n", vf, mbx.msg.msg);
@@ -1157,8 +1144,6 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
            mbx.msg.msg, vf);
        nic_mbx_send_nack(nic, vf);
    }
-unlock:
-   nic->mbx_lock[vf] = false;
 }

 static irqreturn_t nic_mbx_intr_handler(int irq, void *nic_irq)
@@ -1306,52 +1291,6 @@ static int nic_sriov_init(struct pci_dev *pdev, struct nicpf *nic)
    return 0;
 }

-/* Poll for BGX LMAC link status and update corresponding VF
- * if there is a change, valid only if internal L2 switch
- * is not present otherwise VF link is always treated as up
- */
-static void nic_poll_for_link(struct work_struct *work)
-{
-   union nic_mbx mbx = {};
-   struct nicpf *nic;
-   struct bgx_link_status link;
-   u8 vf, bgx, lmac;
-
-   nic = container_of(work, struct nicpf, dwork.work);
-
-   mbx.link_status.msg = NIC_MBOX_MSG_BGX_LINK_CHANGE;
-
-   for (vf = 0; vf < nic->num_vf_en; vf++) {
-       /* Poll only if VF is UP */
-       if (!nic->vf_enabled[vf])
-           continue;
-
-       /* Get BGX, LMAC indices for the VF */
-       bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
-       lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
-       /* Get interface link status */
-       bgx_get_lmac_link_state(nic->node, bgx, lmac, &link);
-
-       /* Inform VF only if link status changed */
-       if (nic->link[vf] == link.link_up)
-           continue;
-
-       if (!nic->mbx_lock[vf]) {
-           nic->link[vf] = link.link_up;
-           nic->duplex[vf] = link.duplex;
-           nic->speed[vf] = link.speed;
-
-           /* Send a mbox message to VF with current link status */
-           mbx.link_status.link_up = link.link_up;
-           mbx.link_status.duplex = link.duplex;
-           mbx.link_status.speed = link.speed;
-           mbx.link_status.mac_type = link.mac_type;
-           nic_send_msg_to_vf(nic, vf, &mbx);
-       }
-   }
-   queue_delayed_work(nic->check_link, &nic->dwork, HZ * 2);
-}
-
 static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
    struct device *dev = &pdev->dev;
@@ -1420,18 +1359,6 @@ static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
    if (!nic->vf_lmac_map)
        goto err_release_regions;

-   nic->link = devm_kmalloc_array(dev, max_lmac, sizeof(u8), GFP_KERNEL);
-   if (!nic->link)
-       goto err_release_regions;
-
-   nic->duplex = devm_kmalloc_array(dev, max_lmac, sizeof(u8), GFP_KERNEL);
-   if (!nic->duplex)
-       goto err_release_regions;
-
-   nic->speed = devm_kmalloc_array(dev, max_lmac, sizeof(u32), GFP_KERNEL);
-   if (!nic->speed)
-       goto err_release_regions;
-
    /* Initialize hardware */
    nic_init_hw(nic);

@@ -1447,19 +1374,8 @@ static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
    if (err)
        goto err_unregister_interrupts;

-   /* Register a physical link status poll fn() */
-   nic->check_link = alloc_workqueue("check_link_status",
-                     WQ_UNBOUND | WQ_MEM_RECLAIM, 1);
-   if (!nic->check_link) {
-       err = -ENOMEM;
-       goto err_disable_sriov;
-   }
-
    return 0;

-err_disable_sriov:
-   if (nic->flags & NIC_SRIOV_ENABLED)
-       pci_disable_sriov(pdev);
 err_unregister_interrupts:
    nic_unregister_interrupts(nic);
 err_release_regions:
@@ -1480,12 +1396,6 @@ static void nic_remove(struct pci_dev *pdev)
    if (nic->flags & NIC_SRIOV_ENABLED)
        pci_disable_sriov(pdev);

-   if (nic->check_link) {
-       /* Destroy work Queue */
-       cancel_delayed_work_sync(&nic->dwork);
-       destroy_workqueue(nic->check_link);
-   }
-
    nic_unregister_interrupts(nic);
    pci_release_regions(pdev);

Leave a Reply

Your email address will not be published. Required fields are marked *