Revert "x86/fault: BUG() when uaccess helpers fault on kernel addresses" [Linux 5.0]

Revert "x86/fault: BUG() when uaccess helpers fault on kernel addresses" [Linux 5.0]

This Linux kernel change "Revert “x86/fault: BUG() when uaccess helpers fault on kernel addresses”" is included in the Linux 5.0 release. This change is authored by Linus Torvalds <torvalds [at] linux-foundation.org> on Mon Feb 25 09:10:51 2019 -0800. The commit for this change in Linux stable tree is 53a41cb (patch).

Revert "x86/fault: BUG() when uaccess helpers fault on kernel addresses"

This reverts commit 9da3f2b74054406f87dff7101a569217ffceb29b.

It was well-intentioned, but wrong.  Overriding the exception tables for
instructions for random reasons is just wrong, and that is what the new
code did.

It caused problems for tracing, and it caused problems for strncpy_from_user(),
because the new checks made perfectly valid use cases break, rather than
catch things that did bad things.

Unchecked user space accesses are a problem, but that's not a reason to
add invalid checks that then people have to work around with silly flags
(in this case, that 'kernel_uaccess_faults_ok' flag, which is just an
odd way to say "this commit was wrong" and was sprinked into random
places to hide the wrongness).

The real fix to unchecked user space accesses is to get rid of the
special "let's not check __get_user() and __put_user() at all" logic.
Make __{get|put}_user() be just aliases to the regular {get|put}_user()
functions, and make it impossible to access user space without having
the proper checks in places.

The raison d'ĂȘtre of the special double-underscore versions used to be
that the range check was expensive, and if you did multiple user
accesses, you'd do the range check up front (like the signal frame
handling code, for example).  But SMAP (on x86) and PAN (on ARM) have
made that optimization pointless, because the _real_ expense is the "set
CPU flag to allow user space access".

Do let's not break the valid cases to catch invalid cases that shouldn't
even exist.

Cc: Thomas Gleixner <[email protected]>
Cc: Kees Cook <[email protected]>
Cc: Tobin C. Harding <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Cc: Jann Horn <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>

There are 72 lines of Linux source code added/deleted in this change. Code changes to Linux kernel are as follows.

 arch/x86/mm/extable.c | 58 ---------------------------------------------------
 fs/namespace.c        |  2 --
 include/linux/sched.h |  6 ------
 mm/maccess.c          |  6 ------
 4 files changed, 72 deletions(-)

diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 6521134..856fa40 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -117,67 +117,11 @@ __visible bool ex_handler_fprestore(const struct exception_table_entry *fixup,
 }
 EXPORT_SYMBOL_GPL(ex_handler_fprestore);

-/* Helper to check whether a uaccess fault indicates a kernel bug. */
-static bool bogus_uaccess(struct pt_regs *regs, int trapnr,
-             unsigned long fault_addr)
-{
-   /* This is the normal case: #PF with a fault address in userspace. */
-   if (trapnr == X86_TRAP_PF && fault_addr < TASK_SIZE_MAX)
-       return false;
-
-   /*
-    * This code can be reached for machine checks, but only if the #MC
-    * handler has already decided that it looks like a candidate for fixup.
-    * This e.g. happens when attempting to access userspace memory which
-    * the CPU can't access because of uncorrectable bad memory.
-    */
-   if (trapnr == X86_TRAP_MC)
-       return false;
-
-   /*
-    * There are two remaining exception types we might encounter here:
-    *  - #PF for faulting accesses to kernel addresses
-    *  - #GP for faulting accesses to noncanonical addresses
-    * Complain about anything else.
-    */
-   if (trapnr != X86_TRAP_PF && trapnr != X86_TRAP_GP) {
-       WARN(1, "unexpected trap %d in uaccess\n", trapnr);
-       return false;
-   }
-
-   /*
-    * This is a faulting memory access in kernel space, on a kernel
-    * address, in a usercopy function. This can e.g. be caused by improper
-    * use of helpers like __put_user and by improper attempts to access
-    * userspace addresses in KERNEL_DS regions.
-    * The one (semi-)legitimate exception are probe_kernel_{read,write}(),
-    * which can be invoked from places like kgdb, /dev/mem (for reading)
-    * and privileged BPF code (for reading).
-    * The probe_kernel_*() functions set the kernel_uaccess_faults_ok flag
-    * to tell us that faulting on kernel addresses, and even noncanonical
-    * addresses, in a userspace accessor does not necessarily imply a
-    * kernel bug, root might just be doing weird stuff.
-    */
-   if (current->kernel_uaccess_faults_ok)
-       return false;
-
-   /* This is bad. Refuse the fixup so that we go into die(). */
-   if (trapnr == X86_TRAP_PF) {
-       pr_emerg("BUG: pagefault on kernel address 0x%lx in non-whitelisted uaccess\n",
-            fault_addr);
-   } else {
-       pr_emerg("BUG: GPF in non-whitelisted uaccess (non-canonical address?)\n");
-   }
-   return true;
-}
-
 __visible bool ex_handler_uaccess(const struct exception_table_entry *fixup,
                  struct pt_regs *regs, int trapnr,
                  unsigned long error_code,
                  unsigned long fault_addr)
 {
-   if (bogus_uaccess(regs, trapnr, fault_addr))
-       return false;
    regs->ip = ex_fixup_addr(fixup);
    return true;
 }
@@ -188,8 +132,6 @@ __visible bool ex_handler_ext(const struct exception_table_entry *fixup,
                  unsigned long error_code,
                  unsigned long fault_addr)
 {
-   if (bogus_uaccess(regs, trapnr, fault_addr))
-       return false;
    /* Special hack for uaccess_err */
    current->thread.uaccess_err = 1;
    regs->ip = ex_fixup_addr(fixup);
diff --git a/fs/namespace.c b/fs/namespace.c
index a677b59..678ef17 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2698,7 +2698,6 @@ static long exact_copy_from_user(void *to, const void __user * from,
    if (!access_ok(from, n))
        return n;

-   current->kernel_uaccess_faults_ok++;
    while (n) {
        if (__get_user(c, f)) {
            memset(t, 0, n);
@@ -2708,7 +2707,6 @@ static long exact_copy_from_user(void *to, const void __user * from,
        f++;
        n--;
    }
-   current->kernel_uaccess_faults_ok--;
    return n;
 }

diff --git a/include/linux/sched.h b/include/linux/sched.h
index bba3afb..f9b43c9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -739,12 +739,6 @@ struct task_struct {
    unsigned            use_memdelay:1;
 #endif

-   /*
-    * May usercopy functions fault on kernel addresses?
-    * This is not just a single bit because this can potentially nest.
-    */
-   unsigned int            kernel_uaccess_faults_ok;
-
    unsigned long           atomic_flags; /* Flags requiring atomic access. */

    struct restart_block        restart_block;
diff --git a/mm/maccess.c b/mm/maccess.c
index f341663..ec00be5 100644
--- a/mm/maccess.c
+++ b/mm/maccess.c
@@ -30,10 +30,8 @@ long __probe_kernel_read(void *dst, const void *src, size_t size)

    set_fs(KERNEL_DS);
    pagefault_disable();
-   current->kernel_uaccess_faults_ok++;
    ret = __copy_from_user_inatomic(dst,
            (__force const void __user *)src, size);
-   current->kernel_uaccess_faults_ok--;
    pagefault_enable();
    set_fs(old_fs);

@@ -60,9 +58,7 @@ long __probe_kernel_write(void *dst, const void *src, size_t size)

    set_fs(KERNEL_DS);
    pagefault_disable();
-   current->kernel_uaccess_faults_ok++;
    ret = __copy_to_user_inatomic((__force void __user *)dst, src, size);
-   current->kernel_uaccess_faults_ok--;
    pagefault_enable();
    set_fs(old_fs);

@@ -98,13 +94,11 @@ long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count)

    set_fs(KERNEL_DS);
    pagefault_disable();
-   current->kernel_uaccess_faults_ok++;

    do {
        ret = __get_user(*dst++, (const char __user __force *)src++);
    } while (dst[-1] && ret == 0 && src - unsafe_addr < count);

-   current->kernel_uaccess_faults_ok--;
    dst[-1] = '\0';
    pagefault_enable();
    set_fs(old_fs);

Leave a Reply

Your email address will not be published. Required fields are marked *