Tracing fixes for 6.6-rc2:

- Fix the "bytes" output of the per_cpu stat file
   The tracefs/per_cpu/cpu*/stats "bytes" was giving bogus values as the
   accounting was not accurate. It is suppose to show how many used bytes are
   still in the ring buffer, but even when the ring buffer was empty it would
   still show there were bytes used.
 
 - Fix a bug in eventfs where reading a dynamic event directory (open) and then
   creating a dynamic event that goes into that diretory screws up the accounting.
   On close, the newly created event dentry will get a "dput" without ever having
   a "dget" done for it. The fix is to allocate an array on dir open to save what
   dentries were actually "dget" on, and what ones to "dput" on close.
 -----BEGIN PGP SIGNATURE-----
 
 iIoEABYIADIWIQRRSw7ePDh/lE+zeZMp5XQQmuv6qgUCZQ9wihQccm9zdGVkdEBn
 b29kbWlzLm9yZwAKCRAp5XQQmuv6quz4AP4vSFohvmAcTzC+sKP7gMLUvEmqL76+
 1pixXrQOIP5BrQEApUW3VnjqYgjZJR2ne0N4MvvmYElm/ylBhDd4JRrD3g8=
 =X9wd
 -----END PGP SIGNATURE-----

Merge tag 'trace-v6.6-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace

Pull tracing fixes from Steven Rostedt:

 - Fix the "bytes" output of the per_cpu stat file

   The tracefs/per_cpu/cpu*/stats "bytes" was giving bogus values as the
   accounting was not accurate. It is suppose to show how many used
   bytes are still in the ring buffer, but even when the ring buffer was
   empty it would still show there were bytes used.

 - Fix a bug in eventfs where reading a dynamic event directory (open)
   and then creating a dynamic event that goes into that diretory screws
   up the accounting.

   On close, the newly created event dentry will get a "dput" without
   ever having a "dget" done for it. The fix is to allocate an array on
   dir open to save what dentries were actually "dget" on, and what ones
   to "dput" on close.

* tag 'trace-v6.6-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace:
  eventfs: Remember what dentries were created on dir open
  ring-buffer: Fix bytes info in per_cpu buffer stats
This commit is contained in:
Linus Torvalds 2023-09-24 13:55:34 -07:00
commit 5edc6bb321
2 changed files with 85 additions and 30 deletions

View file

@ -70,6 +70,7 @@ static struct dentry *eventfs_root_lookup(struct inode *dir,
struct dentry *dentry,
unsigned int flags);
static int dcache_dir_open_wrapper(struct inode *inode, struct file *file);
static int dcache_readdir_wrapper(struct file *file, struct dir_context *ctx);
static int eventfs_release(struct inode *inode, struct file *file);
static const struct inode_operations eventfs_root_dir_inode_operations = {
@ -79,7 +80,7 @@ static const struct inode_operations eventfs_root_dir_inode_operations = {
static const struct file_operations eventfs_file_operations = {
.open = dcache_dir_open_wrapper,
.read = generic_read_dir,
.iterate_shared = dcache_readdir,
.iterate_shared = dcache_readdir_wrapper,
.llseek = generic_file_llseek,
.release = eventfs_release,
};
@ -396,6 +397,11 @@ static struct dentry *eventfs_root_lookup(struct inode *dir,
return ret;
}
struct dentry_list {
void *cursor;
struct dentry **dentries;
};
/**
* eventfs_release - called to release eventfs file/dir
* @inode: inode to be released
@ -404,26 +410,25 @@ static struct dentry *eventfs_root_lookup(struct inode *dir,
static int eventfs_release(struct inode *inode, struct file *file)
{
struct tracefs_inode *ti;
struct eventfs_inode *ei;
struct eventfs_file *ef;
struct dentry *dentry;
int idx;
struct dentry_list *dlist = file->private_data;
void *cursor;
int i;
ti = get_tracefs(inode);
if (!(ti->flags & TRACEFS_EVENT_INODE))
return -EINVAL;
ei = ti->private;
idx = srcu_read_lock(&eventfs_srcu);
list_for_each_entry_srcu(ef, &ei->e_top_files, list,
srcu_read_lock_held(&eventfs_srcu)) {
mutex_lock(&eventfs_mutex);
dentry = ef->dentry;
mutex_unlock(&eventfs_mutex);
if (dentry)
dput(dentry);
if (WARN_ON_ONCE(!dlist))
return -EINVAL;
for (i = 0; dlist->dentries[i]; i++) {
dput(dlist->dentries[i]);
}
srcu_read_unlock(&eventfs_srcu, idx);
cursor = dlist->cursor;
kfree(dlist->dentries);
kfree(dlist);
file->private_data = cursor;
return dcache_dir_close(inode, file);
}
@ -442,22 +447,70 @@ static int dcache_dir_open_wrapper(struct inode *inode, struct file *file)
struct tracefs_inode *ti;
struct eventfs_inode *ei;
struct eventfs_file *ef;
struct dentry_list *dlist;
struct dentry **dentries = NULL;
struct dentry *dentry = file_dentry(file);
struct dentry *d;
struct inode *f_inode = file_inode(file);
int cnt = 0;
int idx;
int ret;
ti = get_tracefs(f_inode);
if (!(ti->flags & TRACEFS_EVENT_INODE))
return -EINVAL;
if (WARN_ON_ONCE(file->private_data))
return -EINVAL;
dlist = kmalloc(sizeof(*dlist), GFP_KERNEL);
if (!dlist)
return -ENOMEM;
ei = ti->private;
idx = srcu_read_lock(&eventfs_srcu);
list_for_each_entry_srcu(ef, &ei->e_top_files, list,
srcu_read_lock_held(&eventfs_srcu)) {
create_dentry(ef, dentry, false);
d = create_dentry(ef, dentry, false);
if (d) {
struct dentry **tmp;
tmp = krealloc(dentries, sizeof(d) * (cnt + 2), GFP_KERNEL);
if (!tmp)
break;
tmp[cnt] = d;
tmp[cnt + 1] = NULL;
cnt++;
dentries = tmp;
}
}
srcu_read_unlock(&eventfs_srcu, idx);
return dcache_dir_open(inode, file);
ret = dcache_dir_open(inode, file);
/*
* dcache_dir_open() sets file->private_data to a dentry cursor.
* Need to save that but also save all the dentries that were
* opened by this function.
*/
dlist->cursor = file->private_data;
dlist->dentries = dentries;
file->private_data = dlist;
return ret;
}
/*
* This just sets the file->private_data back to the cursor and back.
*/
static int dcache_readdir_wrapper(struct file *file, struct dir_context *ctx)
{
struct dentry_list *dlist = file->private_data;
int ret;
file->private_data = dlist->cursor;
ret = dcache_readdir(file, ctx);
dlist->cursor = file->private_data;
file->private_data = dlist;
return ret;
}
/**

View file

@ -354,6 +354,11 @@ static void rb_init_page(struct buffer_data_page *bpage)
local_set(&bpage->commit, 0);
}
static __always_inline unsigned int rb_page_commit(struct buffer_page *bpage)
{
return local_read(&bpage->page->commit);
}
static void free_buffer_page(struct buffer_page *bpage)
{
free_page((unsigned long)bpage->page);
@ -2003,7 +2008,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages)
* Increment overrun to account for the lost events.
*/
local_add(page_entries, &cpu_buffer->overrun);
local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
local_sub(rb_page_commit(to_remove_page), &cpu_buffer->entries_bytes);
local_inc(&cpu_buffer->pages_lost);
}
@ -2367,11 +2372,6 @@ rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer)
cpu_buffer->reader_page->read);
}
static __always_inline unsigned rb_page_commit(struct buffer_page *bpage)
{
return local_read(&bpage->page->commit);
}
static struct ring_buffer_event *
rb_iter_head_event(struct ring_buffer_iter *iter)
{
@ -2517,7 +2517,7 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer,
* the counters.
*/
local_add(entries, &cpu_buffer->overrun);
local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
local_sub(rb_page_commit(next_page), &cpu_buffer->entries_bytes);
local_inc(&cpu_buffer->pages_lost);
/*
@ -2660,9 +2660,6 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
event = __rb_page_index(tail_page, tail);
/* account for padding bytes */
local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes);
/*
* Save the original length to the meta data.
* This will be used by the reader to add lost event
@ -2676,7 +2673,8 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
* write counter enough to allow another writer to slip
* in on this page.
* We put in a discarded commit instead, to make sure
* that this space is not used again.
* that this space is not used again, and this space will
* not be accounted into 'entries_bytes'.
*
* If we are less than the minimum size, we don't need to
* worry about it.
@ -2701,6 +2699,9 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
/* time delta must be non zero */
event->time_delta = 1;
/* account for padding bytes */
local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes);
/* Make sure the padding is visible before the tail_page->write update */
smp_wmb();
@ -4215,7 +4216,7 @@ u64 ring_buffer_oldest_event_ts(struct trace_buffer *buffer, int cpu)
EXPORT_SYMBOL_GPL(ring_buffer_oldest_event_ts);
/**
* ring_buffer_bytes_cpu - get the number of bytes consumed in a cpu buffer
* ring_buffer_bytes_cpu - get the number of bytes unconsumed in a cpu buffer
* @buffer: The ring buffer
* @cpu: The per CPU buffer to read from.
*/
@ -4723,6 +4724,7 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
length = rb_event_length(event);
cpu_buffer->reader_page->read += length;
cpu_buffer->read_bytes += length;
}
static void rb_advance_iter(struct ring_buffer_iter *iter)
@ -5816,7 +5818,7 @@ int ring_buffer_read_page(struct trace_buffer *buffer,
} else {
/* update the entry counter */
cpu_buffer->read += rb_page_entries(reader);
cpu_buffer->read_bytes += BUF_PAGE_SIZE;
cpu_buffer->read_bytes += rb_page_commit(reader);
/* swap the pages */
rb_init_page(bpage);