perf: Make perf_event_output() propagate the output() return

This change “perf: Make perf_event_output() propagate the output() return” in Linux kernel is authored by Arnaldo Carvalho de Melo <acme [at] redhat.com> on Fri Jan 11 13:20:20 2019 -0300.

perf: Make perf_event_output() propagate the output() return

For the original mode of operation it isn't needed, since we report back
errors via PERF_RECORD_LOST records in the ring buffer, but for use in
bpf_perf_event_output() it is convenient to return the errors, basically
-ENOSPC.

Currently bpf_perf_event_output() returns an error indication, the last
thing it does, which is to push it to the ring buffer is that can fail
and if so, this failure won't be reported back to its users, fix it.

Reported-by: Jamal Hadi Salim <jhs@mojatatu.com>
Tested-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: https://lkml.kernel.org/r/20190118150938.GN5823@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

This Linux change may have been applied to various maintained Linux releases and you can find Linux releases including commit 5620196.

There are 48 lines of Linux source code added/deleted in this change. Code changes to Linux kernel are as follows.

 include/linux/perf_event.h                       |  6 +++---
 kernel/events/core.c                             | 11 +++++++----
 kernel/trace/bpf_trace.c                         |  3 +--
 tools/perf/examples/bpf/augmented_raw_syscalls.c |  4 ++--
 tools/perf/examples/bpf/augmented_syscalls.c     | 14 +++++++-------
 tools/perf/examples/bpf/etcsnoop.c               | 10 +++++-----
 6 files changed, 25 insertions(+), 23 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index f8ec361..4eb8806 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -978,9 +978,9 @@ extern void perf_event_output_forward(struct perf_event *event,
 extern void perf_event_output_backward(struct perf_event *event,
 				       struct perf_sample_data *data,
 				       struct pt_regs *regs);
-extern void perf_event_output(struct perf_event *event,
-			      struct perf_sample_data *data,
-			      struct pt_regs *regs);
+extern int perf_event_output(struct perf_event *event,
+			     struct perf_sample_data *data,
+			     struct pt_regs *regs);
 
 static inline bool
 is_default_overflow_handler(struct perf_event *event)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index fbe59b7..bc525cd 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6489,7 +6489,7 @@ void perf_prepare_sample(struct perf_event_header *header,
 		data->phys_addr = perf_virt_to_phys(data->addr);
 }
 
-static __always_inline void
+static __always_inline int
 __perf_event_output(struct perf_event *event,
 		    struct perf_sample_data *data,
 		    struct pt_regs *regs,
@@ -6499,13 +6499,15 @@ void perf_prepare_sample(struct perf_event_header *header,
 {
 	struct perf_output_handle handle;
 	struct perf_event_header header;
+	int err;
 
 	/* protect the callchain buffers */
 	rcu_read_lock();
 
 	perf_prepare_sample(&header, data, event, regs);
 
-	if (output_begin(&handle, event, header.size))
+	err = output_begin(&handle, event, header.size);
+	if (err)
 		goto exit;
 
 	perf_output_sample(&handle, &header, data, event);
@@ -6514,6 +6516,7 @@ void perf_prepare_sample(struct perf_event_header *header,
 
 exit:
 	rcu_read_unlock();
+	return err;
 }
 
 void
@@ -6532,12 +6535,12 @@ void perf_prepare_sample(struct perf_event_header *header,
 	__perf_event_output(event, data, regs, perf_output_begin_backward);
 }
 
-void
+int
 perf_event_output(struct perf_event *event,
 		  struct perf_sample_data *data,
 		  struct pt_regs *regs)
 {
-	__perf_event_output(event, data, regs, perf_output_begin);
+	return __perf_event_output(event, data, regs, perf_output_begin);
 }
 
 /*
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 8b068ad..088c2032 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -431,8 +431,7 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
 	if (unlikely(event->oncpu != cpu))
 		return -EOPNOTSUPP;
 
-	perf_event_output(event, sd, regs);
-	return 0;
+	return perf_event_output(event, sd, regs);
 }
 
 BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c
index 53c2333..9e9d4c6 100644
--- a/tools/perf/examples/bpf/augmented_raw_syscalls.c
+++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c
@@ -141,8 +141,8 @@ int sys_enter(struct syscall_enter_args *args)
 		len = sizeof(augmented_args.args);
 	}
 
-	perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, len);
-	return 0;
+	/* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */
+	return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, len);
 }
 
 SEC("raw_syscalls:sys_exit")
diff --git a/tools/perf/examples/bpf/augmented_syscalls.c b/tools/perf/examples/bpf/augmented_syscalls.c
index 2ae4481..b7dba114 100644
--- a/tools/perf/examples/bpf/augmented_syscalls.c
+++ b/tools/perf/examples/bpf/augmented_syscalls.c
@@ -55,9 +55,9 @@ int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args)				
 		len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size;	
 		len &= sizeof(augmented_args.filename.value) - 1;				
 	}											
-	perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, 			
-			  &augmented_args, len);						
-	return 0;										
+	/* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */	
+	return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, 		
+				 &augmented_args, len);						
 }												
 int syscall_exit(syscall)(struct syscall_exit_args *args)					
 {												
@@ -125,10 +125,10 @@ int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args)				
 /*		addrlen = augmented_args.args.addrlen;				     */		
 /*										     */		
 	probe_read(&augmented_args.addr, addrlen, args->addr_ptr); 				
-	perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, 			
-			  &augmented_args, 							
-			  sizeof(augmented_args) - sizeof(augmented_args.addr) + addrlen);	
-	return 0;										
+	/* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */	
+	return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, 		
+				 &augmented_args, 						
+				sizeof(augmented_args) - sizeof(augmented_args.addr) + addrlen);
 }												
 int syscall_exit(syscall)(struct syscall_exit_args *args)					
 {												
diff --git a/tools/perf/examples/bpf/etcsnoop.c b/tools/perf/examples/bpf/etcsnoop.c
index b59e881..550e69c 100644
--- a/tools/perf/examples/bpf/etcsnoop.c
+++ b/tools/perf/examples/bpf/etcsnoop.c
@@ -49,11 +49,11 @@ int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args)				
 						      args->filename_ptr); 			
 	if (__builtin_memcmp(augmented_args.filename.value, etc, 4) != 0)			
 		return 0;									
-	perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, 			
-			  &augmented_args, 							
-			  (sizeof(augmented_args) - sizeof(augmented_args.filename.value) +	
-			   augmented_args.filename.size));					
-	return 0;										
+	/* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */	
+	return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, 		
+				 &augmented_args,						
+				 (sizeof(augmented_args) - sizeof(augmented_args.filename.value) + 
+				 augmented_args.filename.size));				
 }
 
 struct syscall_enter_openat_args {

The commit for this change in Linux stable tree is 5620196 (patch).

Leave a Reply

Your email address will not be published. Required fields are marked *