bcache: fix stack corruption by PRECEDING_KEY() [Linux 4.14.128]

bcache: fix stack corruption by PRECEDING_KEY() [Linux 4.14.128]

This Linux kernel change "bcache: fix stack corruption by PRECEDING_KEY()" is included in the Linux 4.14.128 release. This change is authored by Coly Li <colyli [at] suse.de> on Mon Jun 10 06:13:34 2019 +0800. The commit for this change in Linux stable tree is a75d830 (patch) which is from upstream commit 31b9095. The same Linux upstream change may have been applied to various maintained Linux releases and you can find all Linux releases containing changes from upstream 31b9095.

bcache: fix stack corruption by PRECEDING_KEY()

commit 31b90956b124240aa8c63250243ae1a53585c5e2 upstream.

Recently people report bcache code compiled with gcc9 is broken, one of
the buggy behavior I observe is that two adjacent 4KB I/Os should merge
into one but they don't. Finally it turns out to be a stack corruption
caused by macro PRECEDING_KEY().

See how PRECEDING_KEY() is defined in bset.h,
437 #define PRECEDING_KEY(_k)                                       \
438 ({                                                              \
439         struct bkey *_ret = NULL;                               \
440                                                                 \
441         if (KEY_INODE(_k) || KEY_OFFSET(_k)) {                  \
442                 _ret = &KEY(KEY_INODE(_k), KEY_OFFSET(_k), 0);  \
443                                                                 \
444                 if (!_ret->low)                                 \
445                         _ret->high--;                           \
446                 _ret->low--;                                    \
447         }                                                       \
448                                                                 \
449         _ret;                                                   \
450 })

At line 442, _ret points to address of a on-stack variable combined by
KEY(), the life range of this on-stack variable is in line 442-446,
once _ret is returned to bch_btree_insert_key(), the returned address
points to an invalid stack address and this address is overwritten in
the following called bch_btree_iter_init(). Then argument 'search' of
bch_btree_iter_init() points to some address inside stackframe of
bch_btree_iter_init(), exact address depends on how the compiler
allocates stack space. Now the stack is corrupted.

Fixes: 0eacac22034c ("bcache: PRECEDING_KEY()")
Signed-off-by: Coly Li <colyli@suse.de>
Reviewed-by: Rolf Fokkens <rolf@rolffokkens.nl>
Reviewed-by: Pierre JUHEN <pierre.juhen@orange.fr>
Tested-by: Shenghui Wang <shhuiw@foxmail.com>
Tested-by: Pierre JUHEN <pierre.juhen@orange.fr>
Cc: Kent Overstreet <kent.overstreet@gmail.com>
Cc: Nix <nix@esperi.org.uk>
Cc: stable@vger.kernel.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

There are 50 lines of Linux source code added/deleted in this change. Code changes to Linux kernel are as follows.

 drivers/md/bcache/bset.c | 16 +++++++++++++---
 drivers/md/bcache/bset.h | 34 ++++++++++++++++++++--------------
 2 files changed, 33 insertions(+), 17 deletions(-)

diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index e56d3ec..1edf951 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -825,12 +825,22 @@ unsigned bch_btree_insert_key(struct btree_keys *b, struct bkey *k,
    struct bset *i = bset_tree_last(b)->data;
    struct bkey *m, *prev = NULL;
    struct btree_iter iter;
+   struct bkey preceding_key_on_stack = ZERO_KEY;
+   struct bkey *preceding_key_p = &preceding_key_on_stack;

    BUG_ON(b->ops->is_extents && !KEY_SIZE(k));

-   m = bch_btree_iter_init(b, &iter, b->ops->is_extents
-               ? PRECEDING_KEY(&START_KEY(k))
-               : PRECEDING_KEY(k));
+   /*
+    * If k has preceding key, preceding_key_p will be set to address
+    *  of k's preceding key; otherwise preceding_key_p will be set
+    * to NULL inside preceding_key().
+    */
+   if (b->ops->is_extents)
+       preceding_key(&START_KEY(k), &preceding_key_p);
+   else
+       preceding_key(k, &preceding_key_p);
+
+   m = bch_btree_iter_init(b, &iter, preceding_key_p);

    if (b->ops->insert_fixup(b, k, &iter, replace_key))
        return status;
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
index fa506c1..8d1964b 100644
--- a/drivers/md/bcache/bset.h
+++ b/drivers/md/bcache/bset.h
@@ -418,20 +418,26 @@ static inline bool bch_cut_back(const struct bkey *where, struct bkey *k)
    return __bch_cut_back(where, k);
 }

-#define PRECEDING_KEY(_k)                  \
-({                             \
-   struct bkey *_ret = NULL;               \
-                               \
-   if (KEY_INODE(_k) || KEY_OFFSET(_k)) {          \
-       _ret = &KEY(KEY_INODE(_k), KEY_OFFSET(_k), 0);  \
-                               \
-       if (!_ret->low)                 \
-           _ret->high--;               \
-       _ret->low--;                    \
-   }                           \
-                               \
-   _ret;                           \
-})
+/*
+ * Pointer '*preceding_key_p' points to a memory object to store preceding
+ * key of k. If the preceding key does not exist, set '*preceding_key_p' to
+ * NULL. So the caller of preceding_key() needs to take care of memory
+ * which '*preceding_key_p' pointed to before calling preceding_key().
+ * Currently the only caller of preceding_key() is bch_btree_insert_key(),
+ * and it points to an on-stack variable, so the memory release is handled
+ * by stackframe itself.
+ */
+static inline void preceding_key(struct bkey *k, struct bkey **preceding_key_p)
+{
+   if (KEY_INODE(k) || KEY_OFFSET(k)) {
+       (**preceding_key_p) = KEY(KEY_INODE(k), KEY_OFFSET(k), 0);
+       if (!(*preceding_key_p)->low)
+           (*preceding_key_p)->high--;
+       (*preceding_key_p)->low--;
+   } else {
+       (*preceding_key_p) = NULL;
+   }
+}

 static inline bool bch_ptr_invalid(struct btree_keys *b, const struct bkey *k)
 {

Leave a Reply

Your email address will not be published. Required fields are marked *