RDMA/ucontext: Fix regression with disassociate [Linux 5.1]

RDMA/ucontext: Fix regression with disassociate [Linux 5.1]

This Linux kernel change "RDMA/ucontext: Fix regression with disassociate" is included in the Linux 5.1 release. This change is authored by Jason Gunthorpe <jgg [at] mellanox.com> on Tue Apr 16 14:07:28 2019 +0300. The commit for this change in Linux stable tree is 67f269b (patch).

RDMA/ucontext: Fix regression with disassociate

When this code was consolidated the intention was that the VMA would
become backed by anonymous zero pages after the zap_vma_pte - however this
very subtly relied on setting the vm_ops = NULL and clearing the VM_SHARED
bits to transform the VMA into an anonymous VMA. Since the vm_ops was
removed this broke.

Now userspace gets a SIGBUS if it touches the vma after disassociation.

Instead of converting the VMA to anonymous provide a fault handler that
puts a zero'd page into the VMA when user-space touches it after
disassociation.

Cc: [email protected]
Suggested-by: Andrea Arcangeli <[email protected]>
Fixes: 5f9794dc94f5 ("RDMA/ucontext: Add a core API for mmaping driver IO memory")
Signed-off-by: Jason Gunthorpe <[email protected]>
Signed-off-by: Leon Romanovsky <[email protected]>
Signed-off-by: Jason Gunthorpe <[email protected]>

There are 53 lines of Linux source code added/deleted in this change. Code changes to Linux kernel are as follows.

 drivers/infiniband/core/uverbs.h      |  1 +
 drivers/infiniband/core/uverbs_main.c | 52 +++++++++++++++++++++++++++++++++--
 2 files changed, 50 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index ea0bc68..32cc8fe 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -160,6 +160,7 @@ struct ib_uverbs_file {

    struct mutex umap_lock;
    struct list_head umaps;
+   struct page *disassociate_page;

    struct idr      idr;
    /* spinlock protects write access to idr */
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 70b7d80..db20b6e 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -208,6 +208,9 @@ void ib_uverbs_release_file(struct kref *ref)
        kref_put(&file->async_file->ref,
             ib_uverbs_release_async_event_file);
    put_device(&file->device->dev);
+
+   if (file->disassociate_page)
+       __free_pages(file->disassociate_page, 0);
    kfree(file);
 }

@@ -877,9 +880,50 @@ static void rdma_umap_close(struct vm_area_struct *vma)
    kfree(priv);
 }

+/*
+ * Once the zap_vma_ptes has been called touches to the VMA will come here and
+ * we return a dummy writable zero page for all the pfns.
+ */
+static vm_fault_t rdma_umap_fault(struct vm_fault *vmf)
+{
+   struct ib_uverbs_file *ufile = vmf->vma->vm_file->private_data;
+   struct rdma_umap_priv *priv = vmf->vma->vm_private_data;
+   vm_fault_t ret = 0;
+
+   if (!priv)
+       return VM_FAULT_SIGBUS;
+
+   /* Read only pages can just use the system zero page. */
+   if (!(vmf->vma->vm_flags & (VM_WRITE | VM_MAYWRITE))) {
+       vmf->page = ZERO_PAGE(vmf->vm_start);
+       get_page(vmf->page);
+       return 0;
+   }
+
+   mutex_lock(&ufile->umap_lock);
+   if (!ufile->disassociate_page)
+       ufile->disassociate_page =
+           alloc_pages(vmf->gfp_mask | __GFP_ZERO, 0);
+
+   if (ufile->disassociate_page) {
+       /*
+        * This VMA is forced to always be shared so this doesn't have
+        * to worry about COW.
+        */
+       vmf->page = ufile->disassociate_page;
+       get_page(vmf->page);
+   } else {
+       ret = VM_FAULT_SIGBUS;
+   }
+   mutex_unlock(&ufile->umap_lock);
+
+   return ret;
+}
+
 static const struct vm_operations_struct rdma_umap_ops = {
    .open = rdma_umap_open,
    .close = rdma_umap_close,
+   .fault = rdma_umap_fault,
 };

 static struct rdma_umap_priv *rdma_user_mmap_pre(struct ib_ucontext *ucontext,
@@ -889,6 +933,9 @@ static struct rdma_umap_priv *rdma_user_mmap_pre(struct ib_ucontext *ucontext,
    struct ib_uverbs_file *ufile = ucontext->ufile;
    struct rdma_umap_priv *priv;

+   if (!(vma->vm_flags & VM_SHARED))
+       return ERR_PTR(-EINVAL);
+
    if (vma->vm_end - vma->vm_start != size)
        return ERR_PTR(-EINVAL);

@@ -992,7 +1039,7 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
         * at a time to get the lock ordering right. Typically there
         * will only be one mm, so no big deal.
         */
-       down_write(&mm->mmap_sem);
+       down_read(&mm->mmap_sem);
        mutex_lock(&ufile->umap_lock);
        list_for_each_entry_safe (priv, next_priv, &ufile->umaps,
                      list) {
@@ -1004,10 +1051,9 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)

            zap_vma_ptes(vma, vma->vm_start,
                     vma->vm_end - vma->vm_start);
-           vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
        }
        mutex_unlock(&ufile->umap_lock);
-       up_write(&mm->mmap_sem);
+       up_read(&mm->mmap_sem);
        mmput(mm);
    }
 }

Leave a Reply

Your email address will not be published. Required fields are marked *