Merge branch ‘bpf-fix-cpu-and-devmap-teardown’ [Linux 4.18]

This Linux kernel change "Merge branch ‘bpf-fix-cpu-and-devmap-teardown’" is included in the Linux 4.18 release. This change is authored by Daniel Borkmann <daniel [at] iogearbox.net> on Thu Aug 9 21:50:45 2018 +0200. The commit for this change in Linux stable tree is 9c95420 (patch). Other info about this change: Merge: bf9bae0 1bf9116

Merge branch 'bpf-fix-cpu-and-devmap-teardown'

Jesper Dangaard Brouer says:

====================
Removing entries from cpumap and devmap, goes through a number of
syncronization steps to make sure no new xdp_frames can be enqueued.
But there is a small chance, that xdp_frames remains which have not
been flushed/processed yet.  Flushing these during teardown, happens
from RCU context and not as usual under RX NAPI context.

The optimization introduced in commt 389ab7f01af9 ("xdp: introduce
xdp_return_frame_rx_napi"), missed that the flush operation can also
be called from RCU context.  Thus, we cannot always use the
xdp_return_frame_rx_napi call, which take advantage of the protection
provided by XDP RX running under NAPI protection.

The samples/bpf xdp_redirect_cpu have a --stress-mode, that is
adjusted to easier reproduce (verified by Red Hat QA).
====================

Signed-off-by: Daniel Borkmann <[email protected]>

There is no are 0 lines of Linux source code added/deleted in this change. Code changes to Linux kernel are as follows.

 kernel/bpf/devmap.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index d361fc1..750d45e 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -217,7 +217,8 @@ void __dev_map_insert_ctx(struct bpf_map *map, u32 bit)
 }

 static int bq_xmit_all(struct bpf_dtab_netdev *obj,
-              struct xdp_bulk_queue *bq, u32 flags)
+              struct xdp_bulk_queue *bq, u32 flags,
+              bool in_napi_ctx)
 {
    struct net_device *dev = obj->dev;
    int sent = 0, drops = 0, err = 0;
@@ -254,7 +255,10 @@ static int bq_xmit_all(struct bpf_dtab_netdev *obj,
        struct xdp_frame *xdpf = bq->q[i];

        /* RX path under NAPI protection, can return frames faster */
-       xdp_return_frame_rx_napi(xdpf);
+       if (likely(in_napi_ctx))
+           xdp_return_frame_rx_napi(xdpf);
+       else
+           xdp_return_frame(xdpf);
        drops++;
    }
    goto out;
@@ -286,7 +290,7 @@ void __dev_map_flush(struct bpf_map *map)
        __clear_bit(bit, bitmap);

        bq = this_cpu_ptr(dev->bulkq);
-       bq_xmit_all(dev, bq, XDP_XMIT_FLUSH);
+       bq_xmit_all(dev, bq, XDP_XMIT_FLUSH, true);
    }
 }

@@ -316,7 +320,7 @@ static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf,
    struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq);

    if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
-       bq_xmit_all(obj, bq, 0);
+       bq_xmit_all(obj, bq, 0, true);

    /* Ingress dev_rx will be the same for all xdp_frame's in
     * bulk_queue, because bq stored per-CPU and must be flushed
@@ -385,7 +389,7 @@ static void dev_map_flush_old(struct bpf_dtab_netdev *dev)
            __clear_bit(dev->bit, bitmap);

            bq = per_cpu_ptr(dev->bulkq, cpu);
-           bq_xmit_all(dev, bq, XDP_XMIT_FLUSH);
+           bq_xmit_all(dev, bq, XDP_XMIT_FLUSH, false);
        }
    }
 }

Leave a Reply

Your email address will not be published. Required fields are marked *