From e20044f7e9ae2b5395ca3ae9bd0907fdf43357a7 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 16 Mar 2021 12:41:01 -0400 Subject: ring-buffer: Separate out internal use of ring_buffer_event_time_stamp() The exported use of ring_buffer_event_time_stamp() is going to become different than how it is used internally. Move the internal logic out into a static function called rb_event_time_stamp(), and have the internal callers call that instead. Link: https://lkml.kernel.org/r/20210316164113.257790481@goodmis.org Reviewed-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ring_buffer.c | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) (limited to 'kernel/trace/ring_buffer.c') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 68744c51517e..941ac2021b97 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -287,6 +287,17 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data); #define TS_MASK ((1ULL << TS_SHIFT) - 1) #define TS_DELTA_TEST (~TS_MASK) +static u64 rb_event_time_stamp(struct ring_buffer_event *event) +{ + u64 ts; + + ts = event->array[0]; + ts <<= TS_SHIFT; + ts += event->time_delta; + + return ts; +} + /** * ring_buffer_event_time_stamp - return the event's extended timestamp * @event: the event to get the timestamp of @@ -299,13 +310,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data); */ u64 ring_buffer_event_time_stamp(struct ring_buffer_event *event) { - u64 ts; - - ts = event->array[0]; - ts <<= TS_SHIFT; - ts += event->time_delta; - - return ts; + return rb_event_time_stamp(event); } /* Flag when events were overwritten */ @@ -2766,7 +2771,7 @@ static u64 rb_time_delta(struct ring_buffer_event *event) return 0; case RINGBUF_TYPE_TIME_EXTEND: - return ring_buffer_event_time_stamp(event); + return rb_event_time_stamp(event); case RINGBUF_TYPE_TIME_STAMP: return 0; @@ -3212,13 +3217,13 @@ static void dump_buffer_page(struct buffer_data_page *bpage, switch (event->type_len) { case RINGBUF_TYPE_TIME_EXTEND: - delta = ring_buffer_event_time_stamp(event); + delta = rb_event_time_stamp(event); ts += delta; pr_warn(" [%lld] delta:%lld TIME EXTEND\n", ts, delta); break; case RINGBUF_TYPE_TIME_STAMP: - delta = ring_buffer_event_time_stamp(event); + delta = rb_event_time_stamp(event); ts = delta; pr_warn(" [%lld] absolute:%lld TIME STAMP\n", ts, delta); break; @@ -3289,12 +3294,12 @@ static void check_buffer(struct ring_buffer_per_cpu *cpu_buffer, switch (event->type_len) { case RINGBUF_TYPE_TIME_EXTEND: - delta = ring_buffer_event_time_stamp(event); + delta = rb_event_time_stamp(event); ts += delta; break; case RINGBUF_TYPE_TIME_STAMP: - delta = ring_buffer_event_time_stamp(event); + delta = rb_event_time_stamp(event); ts = delta; break; @@ -4256,12 +4261,12 @@ rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer, return; case RINGBUF_TYPE_TIME_EXTEND: - delta = ring_buffer_event_time_stamp(event); + delta = rb_event_time_stamp(event); cpu_buffer->read_stamp += delta; return; case RINGBUF_TYPE_TIME_STAMP: - delta = ring_buffer_event_time_stamp(event); + delta = rb_event_time_stamp(event); cpu_buffer->read_stamp = delta; return; @@ -4286,12 +4291,12 @@ rb_update_iter_read_stamp(struct ring_buffer_iter *iter, return; case RINGBUF_TYPE_TIME_EXTEND: - delta = ring_buffer_event_time_stamp(event); + delta = rb_event_time_stamp(event); iter->read_stamp += delta; return; case RINGBUF_TYPE_TIME_STAMP: - delta = ring_buffer_event_time_stamp(event); + delta = rb_event_time_stamp(event); iter->read_stamp = delta; return; @@ -4544,7 +4549,7 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts, case RINGBUF_TYPE_TIME_STAMP: if (ts) { - *ts = ring_buffer_event_time_stamp(event); + *ts = rb_event_time_stamp(event); ring_buffer_normalize_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu, ts); } @@ -4635,7 +4640,7 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) case RINGBUF_TYPE_TIME_STAMP: if (ts) { - *ts = ring_buffer_event_time_stamp(event); + *ts = rb_event_time_stamp(event); ring_buffer_normalize_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu, ts); } -- cgit v1.2.3 From 8672e4948d0c44272cc05f8ff563dbf6b6c1289f Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 16 Mar 2021 12:41:02 -0400 Subject: ring-buffer: Add a event_stamp to cpu_buffer for each level of nesting Add a place to save the current event time stamp for each level of nesting. This will be used to retrieve the time stamp of the current event before it is committed. Link: https://lkml.kernel.org/r/20210316164113.399089673@goodmis.org Reviewed-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ring_buffer.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'kernel/trace/ring_buffer.c') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 941ac2021b97..470d97169081 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -492,6 +492,8 @@ struct rb_time_struct { #endif typedef struct rb_time_struct rb_time_t; +#define MAX_NEST 5 + /* * head_page == tail_page && head == tail then buffer is empty. */ @@ -529,6 +531,7 @@ struct ring_buffer_per_cpu { unsigned long read_bytes; rb_time_t write_stamp; rb_time_t before_stamp; + u64 event_stamp[MAX_NEST]; u64 read_stamp; /* ring buffer pages to update, > 0 to add, < 0 to remove */ long nr_pages_to_update; @@ -2715,6 +2718,10 @@ rb_update_event(struct ring_buffer_per_cpu *cpu_buffer, { unsigned length = info->length; u64 delta = info->delta; + unsigned int nest = local_read(&cpu_buffer->committing) - 1; + + if (nest < MAX_NEST) + cpu_buffer->event_stamp[nest] = info->ts; /* * If we need to add a timestamp, then we @@ -3456,7 +3463,6 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, info->after, ts)) { /* Nothing came after this event between C and E */ info->delta = ts - info->after; - info->ts = ts; } else { /* * Interrupted between C and E: @@ -3468,6 +3474,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, */ info->delta = 0; } + info->ts = ts; info->add_timestamp &= ~RB_ADD_STAMP_FORCE; } @@ -5026,6 +5033,8 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) rb_time_set(&cpu_buffer->write_stamp, 0); rb_time_set(&cpu_buffer->before_stamp, 0); + memset(cpu_buffer->event_stamp, 0, sizeof(cpu_buffer->event_stamp)); + cpu_buffer->lost_events = 0; cpu_buffer->last_overrun = 0; -- cgit v1.2.3 From efe6196a6bc5bbc84b856316c4687fd24566a95c Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 16 Mar 2021 12:41:04 -0400 Subject: ring-buffer: Allow ring_buffer_event_time_stamp() to return time stamp of all events Currently, ring_buffer_event_time_stamp() only returns an accurate time stamp of the event if it has an absolute extended time stamp attached to it. To make it more robust, use the event_stamp() in case the event does not have an absolute value attached to it. This will allow ring_buffer_event_time_stamp() to be used in more cases than just histograms, and it will also allow histograms to not require including absolute values all the time. Link: https://lkml.kernel.org/r/20210316164113.704830885@goodmis.org Reviewed-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ring_buffer.c | 60 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 45 insertions(+), 15 deletions(-) (limited to 'kernel/trace/ring_buffer.c') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 470d97169081..8fa2a84f714f 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -298,21 +298,6 @@ static u64 rb_event_time_stamp(struct ring_buffer_event *event) return ts; } -/** - * ring_buffer_event_time_stamp - return the event's extended timestamp - * @event: the event to get the timestamp of - * - * Returns the extended timestamp associated with a data event. - * An extended time_stamp is a 64-bit timestamp represented - * internally in a special way that makes the best use of space - * contained within a ring buffer event. This function decodes - * it and maps it to a straight u64 value. - */ -u64 ring_buffer_event_time_stamp(struct ring_buffer_event *event) -{ - return rb_event_time_stamp(event); -} - /* Flag when events were overwritten */ #define RB_MISSED_EVENTS (1 << 31) /* Missed count stored at end */ @@ -757,6 +742,51 @@ static bool rb_time_cmpxchg(rb_time_t *t, u64 expect, u64 set) } #endif +static inline u64 rb_time_stamp(struct trace_buffer *buffer); + +/** + * ring_buffer_event_time_stamp - return the event's current time stamp + * @buffer: The buffer that the event is on + * @event: the event to get the time stamp of + * + * Note, this must be called after @event is reserved, and before it is + * committed to the ring buffer. And must be called from the same + * context where the event was reserved (normal, softirq, irq, etc). + * + * Returns the time stamp associated with the current event. + * If the event has an extended time stamp, then that is used as + * the time stamp to return. + * In the highly unlikely case that the event was nested more than + * the max nesting, then the write_stamp of the buffer is returned, + * otherwise current time is returned, but that really neither of + * the last two cases should ever happen. + */ +u64 ring_buffer_event_time_stamp(struct trace_buffer *buffer, + struct ring_buffer_event *event) +{ + struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[smp_processor_id()]; + unsigned int nest; + u64 ts; + + /* If the event includes an absolute time, then just use that */ + if (event->type_len == RINGBUF_TYPE_TIME_STAMP) + return rb_event_time_stamp(event); + + /* Read the current saved nesting level time stamp */ + nest = local_read(&cpu_buffer->committing) - 1; + if (likely(nest < MAX_NEST)) + return cpu_buffer->event_stamp[nest]; + + WARN_ON_ONCE(1); + + /* Can only fail on 32 bit */ + if (!rb_time_read(&cpu_buffer->write_stamp, &ts)) + /* Screw it, just read the current time */ + ts = rb_time_stamp(cpu_buffer->buffer); + + return ts; +} + /** * ring_buffer_nr_pages - get the number of buffer pages in the ring buffer * @buffer: The ring_buffer to get the number of pages from -- cgit v1.2.3 From a948c69d6fb1ba749a958a8a87d4eecdda28989d Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 16 Mar 2021 12:41:06 -0400 Subject: ring-buffer: Add verifier for using ring_buffer_event_time_stamp() The ring_buffer_event_time_stamp() must be only called by an event that has not been committed yet, and is on the buffer that is passed in. This was used to help debug converting the histogram logic over to using the new time stamp code, and was proven to be very useful. Add a verifier that can check that this is the case, and extra WARN_ONs to catch unexpected use cases. Link: https://lkml.kernel.org/r/20210316164113.987294354@goodmis.org Reviewed-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ring_buffer.c | 56 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 52 insertions(+), 4 deletions(-) (limited to 'kernel/trace/ring_buffer.c') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 8fa2a84f714f..1c61a8cd7b99 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -742,6 +742,48 @@ static bool rb_time_cmpxchg(rb_time_t *t, u64 expect, u64 set) } #endif +/* + * Enable this to make sure that the event passed to + * ring_buffer_event_time_stamp() is not committed and also + * is on the buffer that it passed in. + */ +//#define RB_VERIFY_EVENT +#ifdef RB_VERIFY_EVENT +static struct list_head *rb_list_head(struct list_head *list); +static void verify_event(struct ring_buffer_per_cpu *cpu_buffer, + void *event) +{ + struct buffer_page *page = cpu_buffer->commit_page; + struct buffer_page *tail_page = READ_ONCE(cpu_buffer->tail_page); + struct list_head *next; + long commit, write; + unsigned long addr = (unsigned long)event; + bool done = false; + int stop = 0; + + /* Make sure the event exists and is not committed yet */ + do { + if (page == tail_page || WARN_ON_ONCE(stop++ > 100)) + done = true; + commit = local_read(&page->page->commit); + write = local_read(&page->write); + if (addr >= (unsigned long)&page->page->data[commit] && + addr < (unsigned long)&page->page->data[write]) + return; + + next = rb_list_head(page->list.next); + page = list_entry(next, struct buffer_page, list); + } while (!done); + WARN_ON_ONCE(1); +} +#else +static inline void verify_event(struct ring_buffer_per_cpu *cpu_buffer, + void *event) +{ +} +#endif + + static inline u64 rb_time_stamp(struct trace_buffer *buffer); /** @@ -772,13 +814,19 @@ u64 ring_buffer_event_time_stamp(struct trace_buffer *buffer, if (event->type_len == RINGBUF_TYPE_TIME_STAMP) return rb_event_time_stamp(event); + nest = local_read(&cpu_buffer->committing); + verify_event(cpu_buffer, event); + if (WARN_ON_ONCE(!nest)) + goto fail; + /* Read the current saved nesting level time stamp */ - nest = local_read(&cpu_buffer->committing) - 1; - if (likely(nest < MAX_NEST)) + if (likely(--nest < MAX_NEST)) return cpu_buffer->event_stamp[nest]; - WARN_ON_ONCE(1); + /* Shouldn't happen, warn if it does */ + WARN_ONCE(1, "nest (%d) greater than max", nest); + fail: /* Can only fail on 32 bit */ if (!rb_time_read(&cpu_buffer->write_stamp, &ts)) /* Screw it, just read the current time */ @@ -2750,7 +2798,7 @@ rb_update_event(struct ring_buffer_per_cpu *cpu_buffer, u64 delta = info->delta; unsigned int nest = local_read(&cpu_buffer->committing) - 1; - if (nest < MAX_NEST) + if (!WARN_ON_ONCE(nest >= MAX_NEST)) cpu_buffer->event_stamp[nest] = info->ts; /* -- cgit v1.2.3 From f2cc020d7876de7583feb52ec939a32419cf9468 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 23 Mar 2021 18:49:35 +0100 Subject: tracing: Fix various typos in comments Fix ~59 single-word typos in the tracing code comments, and fix the grammar in a handful of places. Link: https://lore.kernel.org/r/20210322224546.GA1981273@gmail.com Link: https://lkml.kernel.org/r/20210323174935.GA4176821@gmail.com Reviewed-by: Randy Dunlap Signed-off-by: Ingo Molnar Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ring_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/ring_buffer.c') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 1c61a8cd7b99..f4216df58e31 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3154,7 +3154,7 @@ rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) * is called before preempt_count() is updated, since the check will * be on the NORMAL bit, the TRANSITION bit will then be set. If an * NMI then comes in, it will set the NMI bit, but when the NMI code - * does the trace_recursive_unlock() it will clear the TRANSTION bit + * does the trace_recursive_unlock() it will clear the TRANSITION bit * and leave the NMI bit set. But this is fine, because the interrupt * code that set the TRANSITION bit will then clear the NMI bit when it * calls trace_recursive_unlock(). If another NMI comes in, it will -- cgit v1.2.3 From f3ef7202ef7c705d640d1aeec3b286a641ac9186 Mon Sep 17 00:00:00 2001 From: "Yordan Karadzhov (VMware)" Date: Mon, 29 Mar 2021 16:03:31 +0300 Subject: tracing: Remove unused argument from "ring_buffer_time_stamp() The "cpu" parameter is not being used by the function. Link: https://lkml.kernel.org/r/20210329130331.199402-1-y.karadz@gmail.com Signed-off-by: Yordan Karadzhov (VMware) Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ring_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/ring_buffer.c') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index f4216df58e31..2c0ee6484990 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1080,7 +1080,7 @@ static inline u64 rb_time_stamp(struct trace_buffer *buffer) return ts << DEBUG_SHIFT; } -u64 ring_buffer_time_stamp(struct trace_buffer *buffer, int cpu) +u64 ring_buffer_time_stamp(struct trace_buffer *buffer) { u64 time; -- cgit v1.2.3