tools/io_uring: sync with liburing [Linux 5.2]

tools/io_uring: sync with liburing [Linux 5.2]

This Linux kernel change "tools/io_uring: sync with liburing" is included in the Linux 5.2 release. This change is authored by Jens Axboe <axboe [at] kernel.dk> on Wed May 22 08:59:12 2019 -0600. The commit for this change in Linux stable tree is 004d564 (patch).

tools/io_uring: sync with liburing

Various fixes and changes have been applied to liburing since we
copied some select bits to the kernel testing/examples part, sync
up with liburing to get those changes.

Most notable is the change that split the CQE reading into the peek
and seen event, instead of being just a single function. Also fixes
an unsigned wrap issue in io_uring_submit(), leak of 'fd' in setup
if we fail, and various other little issues.

Signed-off-by: Jens Axboe <[email protected]>

There are 179 lines of Linux source code added/deleted in this change. Code changes to Linux kernel are as follows.

 tools/io_uring/io_uring-cp.c | 21 ++++++++++-----
 tools/io_uring/liburing.h    | 64 +++++++++++++++++++++++++++++++++++---------
 tools/io_uring/queue.c       | 36 ++++++++++---------------
 tools/io_uring/setup.c       | 10 ++++---
 tools/io_uring/syscall.c     | 48 ++++++++++++++++++++-------------
 5 files changed, 118 insertions(+), 61 deletions(-)

diff --git a/tools/io_uring/io_uring-cp.c b/tools/io_uring/io_uring-cp.c
index 633f65bb..8146181 100644
--- a/tools/io_uring/io_uring-cp.c
+++ b/tools/io_uring/io_uring-cp.c
@@ -13,6 +13,7 @@
 #include <assert.h>
 #include <errno.h>
 #include <inttypes.h>
+#include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/ioctl.h>

@@ -85,11 +86,16 @@ static int queue_read(struct io_uring *ring, off_t size, off_t offset)
    struct io_uring_sqe *sqe;
    struct io_data *data;

+   data = malloc(size + sizeof(*data));
+   if (!data)
+       return 1;
+
    sqe = io_uring_get_sqe(ring);
-   if (!sqe)
+   if (!sqe) {
+       free(data);
        return 1;
+   }

-   data = malloc(size + sizeof(*data));
    data->read = 1;
    data->offset = data->first_offset = offset;

@@ -166,22 +172,23 @@ static int copy_file(struct io_uring *ring, off_t insize)
            struct io_data *data;

            if (!got_comp) {
-               ret = io_uring_wait_completion(ring, &cqe);
+               ret = io_uring_wait_cqe(ring, &cqe);
                got_comp = 1;
            } else
-               ret = io_uring_get_completion(ring, &cqe);
+               ret = io_uring_peek_cqe(ring, &cqe);
            if (ret < 0) {
-               fprintf(stderr, "io_uring_get_completion: %s\n",
+               fprintf(stderr, "io_uring_peek_cqe: %s\n",
                            strerror(-ret));
                return 1;
            }
            if (!cqe)
                break;

-           data = (struct io_data *) (uintptr_t) cqe->user_data;
+           data = io_uring_cqe_get_data(cqe);
            if (cqe->res < 0) {
                if (cqe->res == -EAGAIN) {
                    queue_prepped(ring, data);
+                   io_uring_cqe_seen(ring, cqe);
                    continue;
                }
                fprintf(stderr, "cqe failed: %s\n",
@@ -193,6 +200,7 @@ static int copy_file(struct io_uring *ring, off_t insize)
                data->iov.iov_len -= cqe->res;
                data->offset += cqe->res;
                queue_prepped(ring, data);
+               io_uring_cqe_seen(ring, cqe);
                continue;
            }

@@ -209,6 +217,7 @@ static int copy_file(struct io_uring *ring, off_t insize)
                free(data);
                writes--;
            }
+           io_uring_cqe_seen(ring, cqe);
        }
    }

diff --git a/tools/io_uring/liburing.h b/tools/io_uring/liburing.h
index cab0f50..5f305c8 100644
--- a/tools/io_uring/liburing.h
+++ b/tools/io_uring/liburing.h
@@ -1,10 +1,16 @@
 #ifndef LIB_URING_H
 #define LIB_URING_H

+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #include <sys/uio.h>
 #include <signal.h>
 #include <string.h>
 #include "../../include/uapi/linux/io_uring.h"
+#include <inttypes.h>
+#include "barrier.h"

 /*
  * Library interface to io_uring
@@ -46,7 +52,7 @@ struct io_uring {
  * System calls
  */
 extern int io_uring_setup(unsigned entries, struct io_uring_params *p);
-extern int io_uring_enter(unsigned fd, unsigned to_submit,
+extern int io_uring_enter(int fd, unsigned to_submit,
    unsigned min_complete, unsigned flags, sigset_t *sig);
 extern int io_uring_register(int fd, unsigned int opcode, void *arg,
    unsigned int nr_args);
@@ -59,14 +65,33 @@ extern int io_uring_queue_init(unsigned entries, struct io_uring *ring,
 extern int io_uring_queue_mmap(int fd, struct io_uring_params *p,
    struct io_uring *ring);
 extern void io_uring_queue_exit(struct io_uring *ring);
-extern int io_uring_get_completion(struct io_uring *ring,
+extern int io_uring_peek_cqe(struct io_uring *ring,
    struct io_uring_cqe **cqe_ptr);
-extern int io_uring_wait_completion(struct io_uring *ring,
+extern int io_uring_wait_cqe(struct io_uring *ring,
    struct io_uring_cqe **cqe_ptr);
 extern int io_uring_submit(struct io_uring *ring);
 extern struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring);

 /*
+ * Must be called after io_uring_{peek,wait}_cqe() after the cqe has
+ * been processed by the application.
+ */
+static inline void io_uring_cqe_seen(struct io_uring *ring,
+                    struct io_uring_cqe *cqe)
+{
+   if (cqe) {
+       struct io_uring_cq *cq = &ring->cq;
+
+       (*cq->khead)++;
+       /*
+        * Ensure that the kernel sees our new head, the kernel has
+        * the matching read barrier.
+        */
+       write_barrier();
+   }
+}
+
+/*
  * Command prep helpers
  */
 static inline void io_uring_sqe_set_data(struct io_uring_sqe *sqe, void *data)
@@ -74,8 +99,14 @@ static inline void io_uring_sqe_set_data(struct io_uring_sqe *sqe, void *data)
    sqe->user_data = (unsigned long) data;
 }

+static inline void *io_uring_cqe_get_data(struct io_uring_cqe *cqe)
+{
+   return (void *) (uintptr_t) cqe->user_data;
+}
+
 static inline void io_uring_prep_rw(int op, struct io_uring_sqe *sqe, int fd,
-                   void *addr, unsigned len, off_t offset)
+                   const void *addr, unsigned len,
+                   off_t offset)
 {
    memset(sqe, 0, sizeof(*sqe));
    sqe->opcode = op;
@@ -86,8 +117,8 @@ static inline void io_uring_prep_rw(int op, struct io_uring_sqe *sqe, int fd,
 }

 static inline void io_uring_prep_readv(struct io_uring_sqe *sqe, int fd,
-                      struct iovec *iovecs, unsigned nr_vecs,
-                      off_t offset)
+                      const struct iovec *iovecs,
+                      unsigned nr_vecs, off_t offset)
 {
    io_uring_prep_rw(IORING_OP_READV, sqe, fd, iovecs, nr_vecs, offset);
 }
@@ -100,14 +131,14 @@ static inline void io_uring_prep_read_fixed(struct io_uring_sqe *sqe, int fd,
 }

 static inline void io_uring_prep_writev(struct io_uring_sqe *sqe, int fd,
-                       struct iovec *iovecs, unsigned nr_vecs,
-                   off_t offset)
+                   const struct iovec *iovecs,
+                   unsigned nr_vecs, off_t offset)
 {
    io_uring_prep_rw(IORING_OP_WRITEV, sqe, fd, iovecs, nr_vecs, offset);
 }

 static inline void io_uring_prep_write_fixed(struct io_uring_sqe *sqe, int fd,
-                        void *buf, unsigned nbytes,
+                        const void *buf, unsigned nbytes,
                         off_t offset)
 {
    io_uring_prep_rw(IORING_OP_WRITE_FIXED, sqe, fd, buf, nbytes, offset);
@@ -131,13 +162,22 @@ static inline void io_uring_prep_poll_remove(struct io_uring_sqe *sqe,
 }

 static inline void io_uring_prep_fsync(struct io_uring_sqe *sqe, int fd,
-                      int datasync)
+                      unsigned fsync_flags)
 {
    memset(sqe, 0, sizeof(*sqe));
    sqe->opcode = IORING_OP_FSYNC;
    sqe->fd = fd;
-   if (datasync)
-       sqe->fsync_flags = IORING_FSYNC_DATASYNC;
+   sqe->fsync_flags = fsync_flags;
+}
+
+static inline void io_uring_prep_nop(struct io_uring_sqe *sqe)
+{
+   memset(sqe, 0, sizeof(*sqe));
+   sqe->opcode = IORING_OP_NOP;
+}
+
+#ifdef __cplusplus
 }
+#endif

 #endif
diff --git a/tools/io_uring/queue.c b/tools/io_uring/queue.c
index 88505e8..321819c 100644
--- a/tools/io_uring/queue.c
+++ b/tools/io_uring/queue.c
@@ -8,8 +8,8 @@
 #include "liburing.h"
 #include "barrier.h"

-static int __io_uring_get_completion(struct io_uring *ring,
-                    struct io_uring_cqe **cqe_ptr, int wait)
+static int __io_uring_get_cqe(struct io_uring *ring,
+                 struct io_uring_cqe **cqe_ptr, int wait)
 {
    struct io_uring_cq *cq = &ring->cq;
    const unsigned mask = *cq->kring_mask;
@@ -39,34 +39,25 @@ static int __io_uring_get_completion(struct io_uring *ring,
            return -errno;
    } while (1);

-   if (*cqe_ptr) {
-       *cq->khead = head + 1;
-       /*
-        * Ensure that the kernel sees our new head, the kernel has
-        * the matching read barrier.
-        */
-       write_barrier();
-   }
-
    return 0;
 }

 /*
- * Return an IO completion, if one is readily available
+ * Return an IO completion, if one is readily available. Returns 0 with
+ * cqe_ptr filled in on success, -errno on failure.
  */
-int io_uring_get_completion(struct io_uring *ring,
-               struct io_uring_cqe **cqe_ptr)
+int io_uring_peek_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr)
 {
-   return __io_uring_get_completion(ring, cqe_ptr, 0);
+   return __io_uring_get_cqe(ring, cqe_ptr, 0);
 }

 /*
- * Return an IO completion, waiting for it if necessary
+ * Return an IO completion, waiting for it if necessary. Returns 0 with
+ * cqe_ptr filled in on success, -errno on failure.
  */
-int io_uring_wait_completion(struct io_uring *ring,
-                struct io_uring_cqe **cqe_ptr)
+int io_uring_wait_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr)
 {
-   return __io_uring_get_completion(ring, cqe_ptr, 1);
+   return __io_uring_get_cqe(ring, cqe_ptr, 1);
 }

 /*
@@ -78,7 +69,7 @@ int io_uring_submit(struct io_uring *ring)
 {
    struct io_uring_sq *sq = &ring->sq;
    const unsigned mask = *sq->kring_mask;
-   unsigned ktail, ktail_next, submitted;
+   unsigned ktail, ktail_next, submitted, to_submit;
    int ret;

    /*
@@ -100,7 +91,8 @@ int io_uring_submit(struct io_uring *ring)
     */
    submitted = 0;
    ktail = ktail_next = *sq->ktail;
-   while (sq->sqe_head < sq->sqe_tail) {
+   to_submit = sq->sqe_tail - sq->sqe_head;
+   while (to_submit--) {
        ktail_next++;
        read_barrier();

@@ -136,7 +128,7 @@ int io_uring_submit(struct io_uring *ring)
    if (ret < 0)
        return -errno;

-   return 0;
+   return ret;
 }

 /*
diff --git a/tools/io_uring/setup.c b/tools/io_uring/setup.c
index 4da19a7..0b50fcd 100644
--- a/tools/io_uring/setup.c
+++ b/tools/io_uring/setup.c
@@ -27,7 +27,7 @@ static int io_uring_mmap(int fd, struct io_uring_params *p,
    sq->kdropped = ptr + p->sq_off.dropped;
    sq->array = ptr + p->sq_off.array;

-   size = p->sq_entries * sizeof(struct io_uring_sqe),
+   size = p->sq_entries * sizeof(struct io_uring_sqe);
    sq->sqes = mmap(0, size, PROT_READ | PROT_WRITE,
                MAP_SHARED | MAP_POPULATE, fd,
                IORING_OFF_SQES);
@@ -79,7 +79,7 @@ int io_uring_queue_mmap(int fd, struct io_uring_params *p, struct io_uring *ring
 int io_uring_queue_init(unsigned entries, struct io_uring *ring, unsigned flags)
 {
    struct io_uring_params p;
-   int fd;
+   int fd, ret;

    memset(&p, 0, sizeof(p));
    p.flags = flags;
@@ -88,7 +88,11 @@ int io_uring_queue_init(unsigned entries, struct io_uring *ring, unsigned flags)
    if (fd < 0)
        return fd;

-   return io_uring_queue_mmap(fd, &p, ring);
+   ret = io_uring_queue_mmap(fd, &p, ring);
+   if (ret)
+       close(fd);
+
+   return ret;
 }

 void io_uring_queue_exit(struct io_uring *ring)
diff --git a/tools/io_uring/syscall.c b/tools/io_uring/syscall.c
index 6b835e5..b22e0aa 100644
--- a/tools/io_uring/syscall.c
+++ b/tools/io_uring/syscall.c
@@ -7,34 +7,46 @@
 #include <signal.h>
 #include "liburing.h"

-#if defined(__x86_64) || defined(__i386__)
-#ifndef __NR_sys_io_uring_setup
-#define __NR_sys_io_uring_setup        425
-#endif
-#ifndef __NR_sys_io_uring_enter
-#define __NR_sys_io_uring_enter        426
-#endif
-#ifndef __NR_sys_io_uring_register
-#define __NR_sys_io_uring_register 427
-#endif
-#else
-#error "Arch not supported yet"
+#ifdef __alpha__
+/*
+ * alpha is the only exception, all other architectures
+ * have common numbers for new system calls.
+ */
+# ifndef __NR_io_uring_setup
+#  define __NR_io_uring_setup      535
+# endif
+# ifndef __NR_io_uring_enter
+#  define __NR_io_uring_enter      536
+# endif
+# ifndef __NR_io_uring_register
+#  define __NR_io_uring_register   537
+# endif
+#else /* !__alpha__ */
+# ifndef __NR_io_uring_setup
+#  define __NR_io_uring_setup      425
+# endif
+# ifndef __NR_io_uring_enter
+#  define __NR_io_uring_enter      426
+# endif
+# ifndef __NR_io_uring_register
+#  define __NR_io_uring_register   427
+# endif
 #endif

 int io_uring_register(int fd, unsigned int opcode, void *arg,
              unsigned int nr_args)
 {
-   return syscall(__NR_sys_io_uring_register, fd, opcode, arg, nr_args);
+   return syscall(__NR_io_uring_register, fd, opcode, arg, nr_args);
 }

-int io_uring_setup(unsigned entries, struct io_uring_params *p)
+int io_uring_setup(unsigned int entries, struct io_uring_params *p)
 {
-   return syscall(__NR_sys_io_uring_setup, entries, p);
+   return syscall(__NR_io_uring_setup, entries, p);
 }

-int io_uring_enter(unsigned fd, unsigned to_submit, unsigned min_complete,
-          unsigned flags, sigset_t *sig)
+int io_uring_enter(int fd, unsigned int to_submit, unsigned int min_complete,
+          unsigned int flags, sigset_t *sig)
 {
-   return syscall(__NR_sys_io_uring_enter, fd, to_submit, min_complete,
+   return syscall(__NR_io_uring_enter, fd, to_submit, min_complete,
            flags, sig, _NSIG / 8);
 }

Leave a Reply

Your email address will not be published. Required fields are marked *