xdp: fix bug in cpumap teardown code path [Linux 4.18]

This Linux kernel change "xdp: fix bug in cpumap teardown code path" is included in the Linux 4.18 release. This change is authored by Jesper Dangaard Brouer <brouer [at] redhat.com> on Wed Aug 8 23:00:34 2018 +0200. The commit for this change in Linux stable tree is ad0ab02 (patch).

xdp: fix bug in cpumap teardown code path

When removing a cpumap entry, a number of syncronization steps happen.
Eventually the teardown code __cpu_map_entry_free is invoked from/via
call_rcu.

The teardown code __cpu_map_entry_free() flushes remaining xdp_frames,
by invoking bq_flush_to_queue, which calls xdp_return_frame_rx_napi().
The issues is that the teardown code is not running in the RX NAPI
code path.  Thus, it is not allowed to invoke the NAPI variant of
xdp_return_frame.

This bug was found and triggered by using the --stress-mode option to
the samples/bpf program xdp_redirect_cpu.  It is hard to trigger,
because the ptr_ring have to be full and cpumap bulk queue max
contains 8 packets, and a remote CPU is racing to empty the ptr_ring
queue.

Fixes: 389ab7f01af9 ("xdp: introduce xdp_return_frame_rx_napi")
Tested-by: Jean-Tsung Hsiao <[email protected]>
Signed-off-by: Jesper Dangaard Brouer <[email protected]>
Signed-off-by: Daniel Borkmann <[email protected]>

There are 15 lines of Linux source code added/deleted in this change. Code changes to Linux kernel are as follows.

 kernel/bpf/cpumap.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index e0918d1..46f5f29 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -69,7 +69,7 @@ struct bpf_cpu_map {
 };

 static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
-                struct xdp_bulk_queue *bq);
+                struct xdp_bulk_queue *bq, bool in_napi_ctx);

 static u64 cpu_map_bitmap_size(const union bpf_attr *attr)
 {
@@ -375,7 +375,7 @@ static void __cpu_map_entry_free(struct rcu_head *rcu)
        struct xdp_bulk_queue *bq = per_cpu_ptr(rcpu->bulkq, cpu);

        /* No concurrent bq_enqueue can run at this point */
-       bq_flush_to_queue(rcpu, bq);
+       bq_flush_to_queue(rcpu, bq, false);
    }
    free_percpu(rcpu->bulkq);
    /* Cannot kthread_stop() here, last put free rcpu resources */
@@ -558,7 +558,7 @@ static int cpu_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
 };

 static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
-                struct xdp_bulk_queue *bq)
+                struct xdp_bulk_queue *bq, bool in_napi_ctx)
 {
    unsigned int processed = 0, drops = 0;
    const int to_cpu = rcpu->cpu;
@@ -578,7 +578,10 @@ static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
        err = __ptr_ring_produce(q, xdpf);
        if (err) {
            drops++;
-           xdp_return_frame_rx_napi(xdpf);
+           if (likely(in_napi_ctx))
+               xdp_return_frame_rx_napi(xdpf);
+           else
+               xdp_return_frame(xdpf);
        }
        processed++;
    }
@@ -598,7 +601,7 @@ static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf)
    struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq);

    if (unlikely(bq->count == CPU_MAP_BULK_SIZE))
-       bq_flush_to_queue(rcpu, bq);
+       bq_flush_to_queue(rcpu, bq, true);

    /* Notice, xdp_buff/page MUST be queued here, long enough for
     * driver to code invoking us to finished, due to driver
@@ -661,7 +664,7 @@ void __cpu_map_flush(struct bpf_map *map)

        /* Flush all frames in bulkq to real queue */
        bq = this_cpu_ptr(rcpu->bulkq);
-       bq_flush_to_queue(rcpu, bq);
+       bq_flush_to_queue(rcpu, bq, true);

        /* If already running, costs spin_lock_irqsave + smb_mb */
        wake_up_process(rcpu->kthread);

Leave a Reply

Your email address will not be published. Required fields are marked *