drm/i915/selftests: Refactor common live_test framework

This change “drm/i915/selftests: Refactor common live_test framework” in Linux kernel is authored by Chris Wilson <chris [at] chris-wilson.co.uk> on Mon Jan 21 22:20:47 2019 +0000.

drm/i915/selftests: Refactor common live_test framework

Before adding yet another copy of struct live_test and its handler,
refactor the existing code into a common framework for live selftests.
For many live selftests, we want to know if the GPU hung or otherwise
misbehaved during the execution of the test (beyond any infraction in
the behaviour under test), live_test provides this by comparing the
GPU state before and after, alerting if it unexpectedly changed (e.g.
the reset counter changed). It also ensures that the GPU is idle before
and after the test, so that residual code running on the GPU is flushed
before testing.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190121222117.23305-5-chris@chris-wilson.co.uk

This Linux change may have been applied to various maintained Linux releases and you can find Linux releases including commit e4a8c81.

There are 310 lines of Linux source code added/deleted in this change. Code changes to Linux kernel are as follows.

 drivers/gpu/drm/i915/Makefile                     |   1 +
 drivers/gpu/drm/i915/selftests/i915_gem_context.c | 103 +++-------------------
 drivers/gpu/drm/i915/selftests/i915_request.c     |  86 +++---------------
 drivers/gpu/drm/i915/selftests/igt_live_test.c    |  85 ++++++++++++++++++
 drivers/gpu/drm/i915/selftests/igt_live_test.h    |  35 ++++++++
 5 files changed, 147 insertions(+), 163 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/selftests/igt_live_test.c
 create mode 100644 drivers/gpu/drm/i915/selftests/igt_live_test.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 611115e..f050759 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -167,6 +167,7 @@ i915-$(CONFIG_DRM_I915_SELFTEST) += 
 	selftests/i915_random.o 
 	selftests/i915_selftest.o 
 	selftests/igt_flush_test.o 
+	selftests/igt_live_test.o 
 	selftests/igt_reset.o 
 	selftests/igt_spinner.o
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 4cba506..e2c1f0b 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -27,6 +27,7 @@
 #include "../i915_selftest.h"
 #include "i915_random.h"
 #include "igt_flush_test.h"
+#include "igt_live_test.h"
 
 #include "mock_drm.h"
 #include "mock_gem_device.h"
@@ -34,84 +35,6 @@
 
 #define DW_PER_PAGE (PAGE_SIZE / sizeof(u32))
 
-struct live_test {
-	struct drm_i915_private *i915;
-	const char *func;
-	const char *name;
-
-	unsigned int reset_global;
-	unsigned int reset_engine[I915_NUM_ENGINES];
-};
-
-static int begin_live_test(struct live_test *t,
-			   struct drm_i915_private *i915,
-			   const char *func,
-			   const char *name)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-	int err;
-
-	t->i915 = i915;
-	t->func = func;
-	t->name = name;
-
-	err = i915_gem_wait_for_idle(i915,
-				     I915_WAIT_LOCKED,
-				     MAX_SCHEDULE_TIMEOUT);
-	if (err) {
-		pr_err("%s(%s): failed to idle before, with err=%d!",
-		       func, name, err);
-		return err;
-	}
-
-	i915->gpu_error.missed_irq_rings = 0;
-	t->reset_global = i915_reset_count(&i915->gpu_error);
-
-	for_each_engine(engine, i915, id)
-		t->reset_engine[id] =
-			i915_reset_engine_count(&i915->gpu_error, engine);
-
-	return 0;
-}
-
-static int end_live_test(struct live_test *t)
-{
-	struct drm_i915_private *i915 = t->i915;
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
-		return -EIO;
-
-	if (t->reset_global != i915_reset_count(&i915->gpu_error)) {
-		pr_err("%s(%s): GPU was reset %d times!n",
-		       t->func, t->name,
-		       i915_reset_count(&i915->gpu_error) - t->reset_global);
-		return -EIO;
-	}
-
-	for_each_engine(engine, i915, id) {
-		if (t->reset_engine[id] ==
-		    i915_reset_engine_count(&i915->gpu_error, engine))
-			continue;
-
-		pr_err("%s(%s): engine '%s' was reset %d times!n",
-		       t->func, t->name, engine->name,
-		       i915_reset_engine_count(&i915->gpu_error, engine) -
-		       t->reset_engine[id]);
-		return -EIO;
-	}
-
-	if (i915->gpu_error.missed_irq_rings) {
-		pr_err("%s(%s): Missed interrupts on engines %lxn",
-		       t->func, t->name, i915->gpu_error.missed_irq_rings);
-		return -EIO;
-	}
-
-	return 0;
-}
-
 static int live_nop_switch(void *arg)
 {
 	const unsigned int nctx = 1024;
@@ -120,8 +43,8 @@ static int live_nop_switch(void *arg)
 	struct i915_gem_context **ctx;
 	enum intel_engine_id id;
 	intel_wakeref_t wakeref;
+	struct igt_live_test t;
 	struct drm_file *file;
-	struct live_test t;
 	unsigned long n;
 	int err = -ENODEV;
 
@@ -185,7 +108,7 @@ static int live_nop_switch(void *arg)
 		pr_info("Populated %d contexts on %s in %llunsn",
 			nctx, engine->name, ktime_to_ns(times[1] - times[0]));
 
-		err = begin_live_test(&t, i915, __func__, engine->name);
+		err = igt_live_test_begin(&t, i915, __func__, engine->name);
 		if (err)
 			goto out_unlock;
 
@@ -233,7 +156,7 @@ static int live_nop_switch(void *arg)
 				break;
 		}
 
-		err = end_live_test(&t);
+		err = igt_live_test_end(&t);
 		if (err)
 			goto out_unlock;
 
@@ -554,10 +477,10 @@ static int igt_ctx_exec(void *arg)
 	struct drm_i915_private *i915 = arg;
 	struct drm_i915_gem_object *obj = NULL;
 	unsigned long ncontexts, ndwords, dw;
+	struct igt_live_test t;
 	struct drm_file *file;
 	IGT_TIMEOUT(end_time);
 	LIST_HEAD(objects);
-	struct live_test t;
 	int err = -ENODEV;
 
 	/*
@@ -575,7 +498,7 @@ static int igt_ctx_exec(void *arg)
 
 	mutex_lock(&i915->drm.struct_mutex);
 
-	err = begin_live_test(&t, i915, __func__, "");
+	err = igt_live_test_begin(&t, i915, __func__, "");
 	if (err)
 		goto out_unlock;
 
@@ -645,7 +568,7 @@ static int igt_ctx_exec(void *arg)
 	}
 
 out_unlock:
-	if (end_live_test(&t))
+	if (igt_live_test_end(&t))
 		err = -EIO;
 	mutex_unlock(&i915->drm.struct_mutex);
 
@@ -660,11 +583,11 @@ static int igt_ctx_readonly(void *arg)
 	struct i915_gem_context *ctx;
 	struct i915_hw_ppgtt *ppgtt;
 	unsigned long ndwords, dw;
+	struct igt_live_test t;
 	struct drm_file *file;
 	I915_RND_STATE(prng);
 	IGT_TIMEOUT(end_time);
 	LIST_HEAD(objects);
-	struct live_test t;
 	int err = -ENODEV;
 
 	/*
@@ -679,7 +602,7 @@ static int igt_ctx_readonly(void *arg)
 
 	mutex_lock(&i915->drm.struct_mutex);
 
-	err = begin_live_test(&t, i915, __func__, "");
+	err = igt_live_test_begin(&t, i915, __func__, "");
 	if (err)
 		goto out_unlock;
 
@@ -757,7 +680,7 @@ static int igt_ctx_readonly(void *arg)
 	}
 
 out_unlock:
-	if (end_live_test(&t))
+	if (igt_live_test_end(&t))
 		err = -EIO;
 	mutex_unlock(&i915->drm.struct_mutex);
 
@@ -982,10 +905,10 @@ static int igt_vm_isolation(void *arg)
 	struct i915_gem_context *ctx_a, *ctx_b;
 	struct intel_engine_cs *engine;
 	intel_wakeref_t wakeref;
+	struct igt_live_test t;
 	struct drm_file *file;
 	I915_RND_STATE(prng);
 	unsigned long count;
-	struct live_test t;
 	unsigned int id;
 	u64 vm_total;
 	int err;
@@ -1004,7 +927,7 @@ static int igt_vm_isolation(void *arg)
 
 	mutex_lock(&i915->drm.struct_mutex);
 
-	err = begin_live_test(&t, i915, __func__, "");
+	err = igt_live_test_begin(&t, i915, __func__, "");
 	if (err)
 		goto out_unlock;
 
@@ -1075,7 +998,7 @@ static int igt_vm_isolation(void *arg)
 out_rpm:
 	intel_runtime_pm_put(i915, wakeref);
 out_unlock:
-	if (end_live_test(&t))
+	if (igt_live_test_end(&t))
 		err = -EIO;
 	mutex_unlock(&i915->drm.struct_mutex);
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index 2e14d6d..4d4b86b 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -25,6 +25,7 @@
 #include <linux/prime_numbers.h>
 
 #include "../i915_selftest.h"
+#include "igt_live_test.h"
 
 #include "mock_context.h"
 #include "mock_gem_device.h"
@@ -270,73 +271,12 @@ int i915_request_mock_selftests(void)
 	return err;
 }
 
-struct live_test {
-	struct drm_i915_private *i915;
-	const char *func;
-	const char *name;
-
-	unsigned int reset_count;
-};
-
-static int begin_live_test(struct live_test *t,
-			   struct drm_i915_private *i915,
-			   const char *func,
-			   const char *name)
-{
-	int err;
-
-	t->i915 = i915;
-	t->func = func;
-	t->name = name;
-
-	err = i915_gem_wait_for_idle(i915,
-				     I915_WAIT_LOCKED,
-				     MAX_SCHEDULE_TIMEOUT);
-	if (err) {
-		pr_err("%s(%s): failed to idle before, with err=%d!",
-		       func, name, err);
-		return err;
-	}
-
-	i915->gpu_error.missed_irq_rings = 0;
-	t->reset_count = i915_reset_count(&i915->gpu_error);
-
-	return 0;
-}
-
-static int end_live_test(struct live_test *t)
-{
-	struct drm_i915_private *i915 = t->i915;
-
-	i915_retire_requests(i915);
-
-	if (wait_for(intel_engines_are_idle(i915), 10)) {
-		pr_err("%s(%s): GPU not idlen", t->func, t->name);
-		return -EIO;
-	}
-
-	if (t->reset_count != i915_reset_count(&i915->gpu_error)) {
-		pr_err("%s(%s): GPU was reset %d times!n",
-		       t->func, t->name,
-		       i915_reset_count(&i915->gpu_error) - t->reset_count);
-		return -EIO;
-	}
-
-	if (i915->gpu_error.missed_irq_rings) {
-		pr_err("%s(%s): Missed interrupts on engines %lxn",
-		       t->func, t->name, i915->gpu_error.missed_irq_rings);
-		return -EIO;
-	}
-
-	return 0;
-}
-
 static int live_nop_request(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
 	struct intel_engine_cs *engine;
 	intel_wakeref_t wakeref;
-	struct live_test t;
+	struct igt_live_test t;
 	unsigned int id;
 	int err = -ENODEV;
 
@@ -354,7 +294,7 @@ static int live_nop_request(void *arg)
 		IGT_TIMEOUT(end_time);
 		ktime_t times[2] = {};
 
-		err = begin_live_test(&t, i915, __func__, engine->name);
+		err = igt_live_test_begin(&t, i915, __func__, engine->name);
 		if (err)
 			goto out_unlock;
 
@@ -396,7 +336,7 @@ static int live_nop_request(void *arg)
 				break;
 		}
 
-		err = end_live_test(&t);
+		err = igt_live_test_end(&t);
 		if (err)
 			goto out_unlock;
 
@@ -483,8 +423,8 @@ static int live_empty_request(void *arg)
 	struct drm_i915_private *i915 = arg;
 	struct intel_engine_cs *engine;
 	intel_wakeref_t wakeref;
+	struct igt_live_test t;
 	struct i915_vma *batch;
-	struct live_test t;
 	unsigned int id;
 	int err = 0;
 
@@ -508,7 +448,7 @@ static int live_empty_request(void *arg)
 		unsigned long n, prime;
 		ktime_t times[2] = {};
 
-		err = begin_live_test(&t, i915, __func__, engine->name);
+		err = igt_live_test_begin(&t, i915, __func__, engine->name);
 		if (err)
 			goto out_batch;
 
@@ -544,7 +484,7 @@ static int live_empty_request(void *arg)
 				break;
 		}
 
-		err = end_live_test(&t);
+		err = igt_live_test_end(&t);
 		if (err)
 			goto out_batch;
 
@@ -643,8 +583,8 @@ static int live_all_engines(void *arg)
 	struct intel_engine_cs *engine;
 	struct i915_request *request[I915_NUM_ENGINES];
 	intel_wakeref_t wakeref;
+	struct igt_live_test t;
 	struct i915_vma *batch;
-	struct live_test t;
 	unsigned int id;
 	int err;
 
@@ -656,7 +596,7 @@ static int live_all_engines(void *arg)
 	mutex_lock(&i915->drm.struct_mutex);
 	wakeref = intel_runtime_pm_get(i915);
 
-	err = begin_live_test(&t, i915, __func__, "");
+	err = igt_live_test_begin(&t, i915, __func__, "");
 	if (err)
 		goto out_unlock;
 
@@ -728,7 +668,7 @@ static int live_all_engines(void *arg)
 		request[id] = NULL;
 	}
 
-	err = end_live_test(&t);
+	err = igt_live_test_end(&t);
 
 out_request:
 	for_each_engine(engine, i915, id)
@@ -749,7 +689,7 @@ static int live_sequential_engines(void *arg)
 	struct i915_request *prev = NULL;
 	struct intel_engine_cs *engine;
 	intel_wakeref_t wakeref;
-	struct live_test t;
+	struct igt_live_test t;
 	unsigned int id;
 	int err;
 
@@ -762,7 +702,7 @@ static int live_sequential_engines(void *arg)
 	mutex_lock(&i915->drm.struct_mutex);
 	wakeref = intel_runtime_pm_get(i915);
 
-	err = begin_live_test(&t, i915, __func__, "");
+	err = igt_live_test_begin(&t, i915, __func__, "");
 	if (err)
 		goto out_unlock;
 
@@ -845,7 +785,7 @@ static int live_sequential_engines(void *arg)
 		GEM_BUG_ON(!i915_request_completed(request[id]));
 	}
 
-	err = end_live_test(&t);
+	err = igt_live_test_end(&t);
 
 out_request:
 	for_each_engine(engine, i915, id) {
diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c
new file mode 100644
index 00000000..5deb485
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c
@@ -0,0 +1,85 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#include "../i915_drv.h"
+
+#include "../i915_selftest.h"
+#include "igt_flush_test.h"
+#include "igt_live_test.h"
+
+int igt_live_test_begin(struct igt_live_test *t,
+			struct drm_i915_private *i915,
+			const char *func,
+			const char *name)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int err;
+
+	lockdep_assert_held(&i915->drm.struct_mutex);
+
+	t->i915 = i915;
+	t->func = func;
+	t->name = name;
+
+	err = i915_gem_wait_for_idle(i915,
+				     I915_WAIT_INTERRUPTIBLE |
+				     I915_WAIT_LOCKED,
+				     MAX_SCHEDULE_TIMEOUT);
+	if (err) {
+		pr_err("%s(%s): failed to idle before, with err=%d!",
+		       func, name, err);
+		return err;
+	}
+
+	i915->gpu_error.missed_irq_rings = 0;
+	t->reset_global = i915_reset_count(&i915->gpu_error);
+
+	for_each_engine(engine, i915, id)
+		t->reset_engine[id] =
+			i915_reset_engine_count(&i915->gpu_error, engine);
+
+	return 0;
+}
+
+int igt_live_test_end(struct igt_live_test *t)
+{
+	struct drm_i915_private *i915 = t->i915;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	lockdep_assert_held(&i915->drm.struct_mutex);
+
+	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		return -EIO;
+
+	if (t->reset_global != i915_reset_count(&i915->gpu_error)) {
+		pr_err("%s(%s): GPU was reset %d times!n",
+		       t->func, t->name,
+		       i915_reset_count(&i915->gpu_error) - t->reset_global);
+		return -EIO;
+	}
+
+	for_each_engine(engine, i915, id) {
+		if (t->reset_engine[id] ==
+		    i915_reset_engine_count(&i915->gpu_error, engine))
+			continue;
+
+		pr_err("%s(%s): engine '%s' was reset %d times!n",
+		       t->func, t->name, engine->name,
+		       i915_reset_engine_count(&i915->gpu_error, engine) -
+		       t->reset_engine[id]);
+		return -EIO;
+	}
+
+	if (i915->gpu_error.missed_irq_rings) {
+		pr_err("%s(%s): Missed interrupts on engines %lxn",
+		       t->func, t->name, i915->gpu_error.missed_irq_rings);
+		return -EIO;
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.h b/drivers/gpu/drm/i915/selftests/igt_live_test.h
new file mode 100644
index 00000000..c0e9f99
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/igt_live_test.h
@@ -0,0 +1,35 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef IGT_LIVE_TEST_H
+#define IGT_LIVE_TEST_H
+
+#include "../i915_gem.h"
+
+struct drm_i915_private;
+
+struct igt_live_test {
+	struct drm_i915_private *i915;
+	const char *func;
+	const char *name;
+
+	unsigned int reset_global;
+	unsigned int reset_engine[I915_NUM_ENGINES];
+};
+
+/*
+ * Flush the GPU state before and after the test to ensure that no residual
+ * code is running on the GPU that may affect this test. Also compare the
+ * state before and after the test and alert if it unexpectedly changes,
+ * e.g. if the GPU was reset.
+ */
+int igt_live_test_begin(struct igt_live_test *t,
+			struct drm_i915_private *i915,
+			const char *func,
+			const char *name);
+int igt_live_test_end(struct igt_live_test *t);
+
+#endif /* IGT_LIVE_TEST_H */

The commit for this change in Linux stable tree is e4a8c81 (patch).

Leave a Reply

Your email address will not be published. Required fields are marked *