The ring buffer is convenient: it has a page granularity and its format is already supported by userspace tools such as trace-cmd. It is a natural solution to store events that would come from outside the kernel such as a hypervisor. In that case, where a writer is external to the kernel, the latter would only be responsible for the allocation and to read back the ring buffer. The allocation is done with the newly introduced function which just needs a size and a set of callbacks (notice only the overwrite mode is supported at the moment): ring_buffer_alloc_ext(unsigned long size, struct ring_buffer_ext_cb *cb) The callbacks given to this allocator enables communication with the external writer: (*swap_reader)(int cpu): Ask the writer to swap the current reader page with the head. (*update_footers)(int cpu): Ask the writer to update material in the page footers. Each page from the ring buffer has indeed a footer in which statistics and page status can be retrieved. This allows the kernel to update its view on the ring buffer, following a reader page swap or a footers update. After the trace_buffer is allocated, a helper serializes the relevant information into a structure that can be easily sent to the external writer: trace_buffer_pack(struct trace_buffer *trace_buffer, struct trace_buffer_pack *pack) The footer and pack description can be found in the newly introduced header file include/linux/ring_buffer_ext.h. When the kernel is writing to the ring buffer, it can wake up quite easily the reader. That's not the case when the writer is external. A new function allows polling for reading the ring buffer: ring_buffer_poke(struct trace_buffer *buffer, int cpu) A ring-buffer allocated for an external writer will forbid any writing (the whole design of the ring buffer mandates a single writer) and will also prevent extending or extracting pages. When I presented this work to the tracingsummit, rosted@ told me he saw some overlapping with an idea he had to enable him to map the tracing buffers in userspace. We designed together a solution that would enable both features. Problem now, if on one hand, the development of the new design has started already... it would nonetheless impose a significant revamp of this patchset, which wouldn't make it to Android14. Nothing technically wrong with anything here, but sending it to LKML wouldn't make sense, as I know already this isn't as "reusable" as the version agreed upon. Bug: 229972309 Change-Id: Iafcc1e2683a7460c94de3db116878c303601df64 Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
80 lines
2.2 KiB
C
80 lines
2.2 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _LINUX_RING_BUFFER_EXT_H
|
|
#define _LINUX_RING_BUFFER_EXT_H
|
|
#include <linux/mm.h>
|
|
#include <linux/types.h>
|
|
|
|
struct rb_ext_stats {
|
|
u64 entries;
|
|
unsigned long pages_touched;
|
|
unsigned long overrun;
|
|
};
|
|
|
|
#define RB_PAGE_FT_HEAD (1 << 0)
|
|
#define RB_PAGE_FT_READER (1 << 1)
|
|
#define RB_PAGE_FT_COMMIT (1 << 2)
|
|
|
|
/*
|
|
* The pages where the events are stored are the only shared elements between
|
|
* the reader and the external writer. They are convenient to enable
|
|
* communication from the writer to the reader. The data will be used by the
|
|
* reader to update its view on the ring buffer.
|
|
*/
|
|
struct rb_ext_page_footer {
|
|
atomic_t writer_status;
|
|
atomic_t reader_status;
|
|
struct rb_ext_stats stats;
|
|
};
|
|
|
|
static inline struct rb_ext_page_footer *rb_ext_page_get_footer(void *page)
|
|
{
|
|
struct rb_ext_page_footer *footer;
|
|
unsigned long page_va = (unsigned long)page;
|
|
|
|
page_va = ALIGN_DOWN(page_va, PAGE_SIZE);
|
|
|
|
return (struct rb_ext_page_footer *)(page_va + PAGE_SIZE -
|
|
sizeof(*footer));
|
|
}
|
|
|
|
#define BUF_EXT_PAGE_SIZE (BUF_PAGE_SIZE - sizeof(struct rb_ext_page_footer))
|
|
|
|
/*
|
|
* An external writer can't rely on the internal struct ring_buffer_per_cpu.
|
|
* Instead, allow to pack the relevant information into struct
|
|
* ring_buffer_pack which can be sent to the writer. The latter can then create
|
|
* its own view on the ring buffer.
|
|
*/
|
|
struct ring_buffer_pack {
|
|
int cpu;
|
|
unsigned long reader_page_va;
|
|
unsigned long nr_pages;
|
|
unsigned long page_va[];
|
|
};
|
|
|
|
struct trace_buffer_pack {
|
|
int nr_cpus;
|
|
unsigned long total_pages;
|
|
char __data[]; /* contains ring_buffer_pack */
|
|
};
|
|
|
|
static inline
|
|
struct ring_buffer_pack *__next_ring_buffer_pack(struct ring_buffer_pack *rb_pack)
|
|
{
|
|
size_t len;
|
|
|
|
len = offsetof(struct ring_buffer_pack, page_va) +
|
|
sizeof(unsigned long) * rb_pack->nr_pages;
|
|
|
|
return (struct ring_buffer_pack *)((void *)rb_pack + len);
|
|
}
|
|
|
|
/*
|
|
* Accessor for ring_buffer_pack's within trace_buffer_pack
|
|
*/
|
|
#define for_each_ring_buffer_pack(rb_pack, cpu, trace_pack) \
|
|
for (rb_pack = (struct ring_buffer_pack *)&trace_pack->__data[0], cpu = 0; \
|
|
cpu < trace_pack->nr_cpus; \
|
|
cpu++, rb_pack = __next_ring_buffer_pack(rb_pack))
|
|
#endif
|