kernel: netlink based system stats interface

Add generic netlink based interface to pass various
system statistics to userspace.

Change-Id: I362c696ade2c5a424516728010122fec54a1a71b
Signed-off-by: Vinayak Menon <vinmenon@codeaurora.org>
This commit is contained in:
Vinayak Menon 2020-11-17 16:06:51 +05:30
parent 6c5fba7425
commit 954aeed7b5
4 changed files with 610 additions and 0 deletions

View File

@ -0,0 +1,92 @@
/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
/*
* Copyright (c) 2021, The Linux Foundation. All rights reserved.
*/
#ifndef _UAPI_MSM_SYSSTATS_H_
#define _UAPI_MSM_SYSSTATS_H_
#include <linux/types.h>
#define SYSSTATS_GENL_NAME "SYSSTATS"
#define SYSSTATS_GENL_VERSION 0x1
#define TS_COMM_LEN 32
#define SYSSTATS_TYPE_UNSPEC 0
#define SYSSTATS_TASK_TYPE_STATS 1
#define SYSSTATS_TYPE_NULL 2
#define SYSSTATS_TASK_TYPE_FOREACH 3
#define SYSSTATS_MEMINFO_TYPE_STATS 4
#define SYSSTATS_CMD_ATTR_UNSPEC 0
#define SYSSTATS_TASK_CMD_ATTR_PID 1
#define SYSSTATS_TASK_CMD_ATTR_FOREACH 2
#define SYSSTATS_CMD_UNSPEC 0
#define SYSSTATS_TASK_CMD_GET 1
#define SYSSTATS_TASK_CMD_NEW 2
#define SYSSTATS_MEMINFO_CMD_GET 3
#define SYSSTATS_MEMINFO_CMD_NEW 4
struct sysstats_task {
__u64 anon_rss; /* KB */
__u64 file_rss; /* KB */
__u64 swap_rss; /* KB */
__u64 shmem_rss; /* KB */
__u64 unreclaimable; /* KB */
__u64 utime; /* User CPU time [usec] */
__u64 stime; /* System CPU time [usec] */
__u64 cutime; /* Cumulative User CPU time [usec] */
__u64 cstime; /* Cumulative System CPU time [usec] */
__s16 oom_score;
__s16 __padding;
__u32 pid;
__u32 uid;
__u32 ppid; /* Parent process ID */
char name[TS_COMM_LEN]; /* Command name */
char state[TS_COMM_LEN]; /* Process state */
};
/*
* All values in KB.
*/
struct sysstats_mem {
__u64 memtotal;
__u64 misc_reclaimable;
__u64 unreclaimable;
__u64 zram_compressed;
__u64 swap_used;
__u64 swap_total;
__u64 buffer;
__u64 vmalloc_total;
__u64 swapcache;
__u64 slab_reclaimable;
__u64 slab_unreclaimable;
__u64 free_cma;
__u64 file_mapped;
__u64 pagetable;
__u64 kernelstack;
__u64 shmem;
__u64 dma_nr_free;
__u64 dma_nr_active_anon;
__u64 dma_nr_inactive_anon;
__u64 dma_nr_active_file;
__u64 dma_nr_inactive_file;
__u64 normal_nr_free;
__u64 normal_nr_active_anon;
__u64 normal_nr_inactive_anon;
__u64 normal_nr_active_file;
__u64 normal_nr_inactive_file;
__u64 movable_nr_free;
__u64 movable_nr_active_anon;
__u64 movable_nr_inactive_anon;
__u64 movable_nr_active_file;
__u64 movable_nr_inactive_file;
__u64 highmem_nr_free;
__u64 highmem_nr_active_anon;
__u64 highmem_nr_inactive_anon;
__u64 highmem_nr_active_file;
__u64 highmem_nr_inactive_file;
};
#endif /* _UAPI_MSM_SYSSTATS_H_ */

View File

@ -567,6 +567,16 @@ config TASKSTATS
Say N if unsure.
config MSM_SYSSTATS
tristate "Export system/task statistics through netlink"
depends on NET
depends on MULTIUSER
help
Export system and task statistics through generic netlink
interface. Userspace can retrieve these statistics via netlink
socket.
If unsure, say N
config TASK_DELAY_ACCT
bool "Enable per-task delay accounting"
depends on TASKSTATS

View File

@ -98,6 +98,7 @@ obj-$(CONFIG_RELAY) += relay.o
obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
obj-$(CONFIG_MSM_SYSSTATS) += msm_sysstats.o
obj-$(CONFIG_TRACEPOINTS) += tracepoint.o
obj-$(CONFIG_LATENCYTOP) += latencytop.o
obj-$(CONFIG_FUNCTION_TRACER) += trace/

507
kernel/msm_sysstats.c Normal file
View File

@ -0,0 +1,507 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2021, The Linux Foundation. All rights reserved.
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/msm_sysstats.h>
#include <linux/percpu.h>
#include <linux/slab.h>
#include <linux/swap.h>
#include <linux/pid_namespace.h>
#include <net/genetlink.h>
#include <linux/atomic.h>
#include <linux/sched/cputime.h>
#include <linux/vmalloc.h>
#include <linux/sched.h>
struct tgid_iter {
unsigned int tgid;
struct task_struct *task;
};
static struct genl_family family;
static DEFINE_PER_CPU(__u32, sysstats_seqnum);
#define SYSSTATS_CMD_ATTR_MAX 2
static const struct nla_policy sysstats_cmd_get_policy[SYSSTATS_CMD_ATTR_MAX + 1] = {
[SYSSTATS_TASK_CMD_ATTR_PID] = { .type = NLA_U32 },
[SYSSTATS_TASK_CMD_ATTR_FOREACH] = { .type = NLA_U32 },};
static int sysstats_pre_doit(const struct genl_ops *ops, struct sk_buff *skb,
struct genl_info *info)
{
const struct nla_policy *policy = NULL;
switch (ops->cmd) {
case SYSSTATS_TASK_CMD_GET:
policy = sysstats_cmd_get_policy;
break;
case SYSSTATS_MEMINFO_CMD_GET:
break;
default:
return -EINVAL;
}
return nlmsg_validate_deprecated(info->nlhdr, GENL_HDRLEN,
SYSSTATS_CMD_ATTR_MAX, policy,
info->extack);
}
static int send_reply(struct sk_buff *skb, struct genl_info *info)
{
struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb));
void *reply = genlmsg_data(genlhdr);
genlmsg_end(skb, reply);
return genlmsg_reply(skb, info);
}
static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp,
size_t size)
{
struct sk_buff *skb;
void *reply;
skb = genlmsg_new(size, GFP_KERNEL);
if (!skb)
return -ENOMEM;
if (!info) {
int seq = this_cpu_inc_return(sysstats_seqnum) - 1;
reply = genlmsg_put(skb, 0, seq, &family, 0, cmd);
} else
reply = genlmsg_put_reply(skb, info, &family, 0, cmd);
if (reply == NULL) {
nlmsg_free(skb);
return -EINVAL;
}
*skbp = skb;
return 0;
}
static struct task_struct *find_lock_task_mm(struct task_struct *p)
{
struct task_struct *t;
rcu_read_lock();
for_each_thread(p, t) {
task_lock(t);
if (likely(t->mm))
goto found;
task_unlock(t);
}
t = NULL;
found:
rcu_read_unlock();
return t;
}
static struct sighand_struct *sysstats_lock_task_sighand(struct task_struct *tsk,
unsigned long *flags)
{
struct sighand_struct *sighand;
rcu_read_lock();
for (;;) {
sighand = rcu_dereference(tsk->sighand);
if (unlikely(sighand == NULL))
break;
spin_lock_irqsave(&sighand->siglock, *flags);
if (likely(sighand == tsk->sighand))
break;
spin_unlock_irqrestore(&sighand->siglock, *flags);
}
rcu_read_unlock();
return sighand;
}
static int sysstats_task_cmd_attr_pid(struct genl_info *info)
{
struct sysstats_task *stats;
struct sk_buff *rep_skb;
struct nlattr *ret;
struct task_struct *tsk;
struct task_struct *p;
size_t size;
u32 pid;
int rc;
u64 utime, stime;
const struct cred *tcred;
#ifdef CONFIG_CPUSETS
struct cgroup_subsys_state *css;
#endif
unsigned long flags;
struct signal_struct *sig;
size = nla_total_size_64bit(sizeof(struct sysstats_task));
rc = prepare_reply(info, SYSSTATS_TASK_CMD_NEW, &rep_skb, size);
if (rc < 0)
return rc;
rc = -EINVAL;
pid = nla_get_u32(info->attrs[SYSSTATS_TASK_CMD_ATTR_PID]);
ret = nla_reserve_64bit(rep_skb, SYSSTATS_TASK_TYPE_STATS,
sizeof(struct sysstats_task), SYSSTATS_TYPE_NULL);
if (!ret)
goto err;
stats = nla_data(ret);
rcu_read_lock();
tsk = find_task_by_vpid(pid);
if (tsk)
get_task_struct(tsk);
rcu_read_unlock();
if (!tsk) {
rc = -ESRCH;
goto err;
}
memset(stats, 0, sizeof(*stats));
stats->pid = task_pid_nr_ns(tsk, task_active_pid_ns(current));
p = find_lock_task_mm(tsk);
if (p) {
__acquire(p->alloc_lock);
#define K(x) ((x) << (PAGE_SHIFT - 10))
stats->anon_rss = K(get_mm_counter(p->mm, MM_ANONPAGES));
stats->file_rss = K(get_mm_counter(p->mm, MM_FILEPAGES));
stats->shmem_rss = K(get_mm_counter(p->mm, MM_SHMEMPAGES));
stats->swap_rss = K(get_mm_counter(p->mm, MM_SWAPENTS));
#undef K
task_unlock(p);
}
task_cputime(tsk, &utime, &stime);
stats->utime = div_u64(utime, NSEC_PER_USEC);
stats->stime = div_u64(stime, NSEC_PER_USEC);
if (sysstats_lock_task_sighand(tsk, &flags)) {
sig = tsk->signal;
stats->cutime = sig->cutime;
stats->cstime = sig->cstime;
unlock_task_sighand(tsk, &flags);
}
rcu_read_lock();
tcred = __task_cred(tsk);
stats->uid = from_kuid_munged(current_user_ns(), tcred->uid);
stats->ppid = pid_alive(tsk) ?
task_tgid_nr_ns(rcu_dereference(tsk->real_parent),
task_active_pid_ns(current)) : 0;
rcu_read_unlock();
strlcpy(stats->name, tsk->comm, sizeof(stats->name));
#ifdef CONFIG_CPUSETS
css = task_get_css(tsk, cpuset_cgrp_id);
cgroup_path_ns(css->cgroup, stats->state, sizeof(stats->state),
current->nsproxy->cgroup_ns);
css_put(css);
#endif
put_task_struct(tsk);
return send_reply(rep_skb, info);
err:
nlmsg_free(rep_skb);
return rc;
}
static int sysstats_task_user_cmd(struct sk_buff *skb, struct genl_info *info)
{
if (info->attrs[SYSSTATS_TASK_CMD_ATTR_PID])
return sysstats_task_cmd_attr_pid(info);
else
return -EINVAL;
}
static struct tgid_iter next_tgid(struct pid_namespace *ns, struct tgid_iter iter)
{
struct pid *pid;
if (iter.task)
put_task_struct(iter.task);
rcu_read_lock();
retry:
iter.task = NULL;
pid = idr_get_next(&ns->idr, &iter.tgid);
if (pid) {
iter.tgid = pid_nr_ns(pid, ns);
iter.task = pid_task(pid, PIDTYPE_TGID);
if (!iter.task) {
iter.tgid += 1;
goto retry;
}
get_task_struct(iter.task);
}
rcu_read_unlock();
return iter;
}
static int sysstats_task_foreach(struct sk_buff *skb, struct netlink_callback *cb)
{
struct pid_namespace *ns = task_active_pid_ns(current);
struct tgid_iter iter;
void *reply;
struct nlattr *attr;
struct nlattr *nla;
struct sysstats_task *stats;
struct task_struct *p;
short oom_score;
short oom_score_min;
short oom_score_max;
u32 buf;
nla = nla_find(nlmsg_attrdata(cb->nlh, GENL_HDRLEN),
nlmsg_attrlen(cb->nlh, GENL_HDRLEN),
SYSSTATS_TASK_CMD_ATTR_FOREACH);
if (!nla)
goto out;
buf = nla_get_u32(nla);
oom_score_min = (short) (buf & 0xFFFF);
oom_score_max = (short) ((buf >> 16) & 0xFFFF);
iter.tgid = cb->args[0];
iter.task = NULL;
for (iter = next_tgid(ns, iter); iter.task;
iter.tgid += 1, iter = next_tgid(ns, iter)) {
if (iter.task->flags & PF_KTHREAD)
continue;
oom_score = iter.task->signal->oom_score_adj;
if ((oom_score < oom_score_min)
|| (oom_score > oom_score_max))
continue;
reply = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, &family, 0, SYSSTATS_TASK_CMD_GET);
if (reply == NULL) {
put_task_struct(iter.task);
break;
}
attr = nla_reserve(skb, SYSSTATS_TASK_TYPE_FOREACH,
sizeof(struct sysstats_task));
if (!attr) {
put_task_struct(iter.task);
genlmsg_cancel(skb, reply);
break;
}
stats = nla_data(attr);
memset(stats, 0, sizeof(struct sysstats_task));
rcu_read_lock();
stats->pid = task_pid_nr_ns(iter.task,
task_active_pid_ns(current));
stats->oom_score = iter.task->signal->oom_score_adj;
rcu_read_unlock();
p = find_lock_task_mm(iter.task);
if (p) {
#define K(x) ((x) << (PAGE_SHIFT - 10))
__acquire(p->alloc_lock);
stats->anon_rss =
K(get_mm_counter(p->mm, MM_ANONPAGES));
stats->file_rss =
K(get_mm_counter(p->mm, MM_FILEPAGES));
stats->shmem_rss =
K(get_mm_counter(p->mm, MM_SHMEMPAGES));
stats->swap_rss =
K(get_mm_counter(p->mm, MM_SWAPENTS));
task_unlock(p);
#undef K
}
genlmsg_end(skb, reply);
}
cb->args[0] = iter.tgid;
out:
return skb->len;
}
#define K(x) ((x) << (PAGE_SHIFT - 10))
#ifndef CONFIG_NUMA
static void sysstats_fill_zoneinfo(struct sysstats_mem *stats)
{
pg_data_t *pgdat;
struct zone *zone;
struct zone *node_zones;
unsigned long zspages = 0;
pgdat = NODE_DATA(0);
node_zones = pgdat->node_zones;
for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
if (!populated_zone(zone))
continue;
zspages += zone_page_state(zone, NR_ZSPAGES);
if (!strcmp(zone->name, "DMA")) {
stats->dma_nr_free =
K(zone_page_state(zone, NR_FREE_PAGES));
stats->dma_nr_active_anon =
K(zone_page_state(zone, NR_ZONE_ACTIVE_ANON));
stats->dma_nr_inactive_anon =
K(zone_page_state(zone, NR_ZONE_INACTIVE_ANON));
stats->dma_nr_active_file =
K(zone_page_state(zone, NR_ZONE_ACTIVE_FILE));
stats->dma_nr_inactive_file =
K(zone_page_state(zone, NR_ZONE_INACTIVE_FILE));
} else if (!strcmp(zone->name, "Normal")) {
stats->normal_nr_free =
K(zone_page_state(zone, NR_FREE_PAGES));
stats->normal_nr_active_anon =
K(zone_page_state(zone, NR_ZONE_ACTIVE_ANON));
stats->normal_nr_inactive_anon =
K(zone_page_state(zone, NR_ZONE_INACTIVE_ANON));
stats->normal_nr_active_file =
K(zone_page_state(zone, NR_ZONE_ACTIVE_FILE));
stats->normal_nr_inactive_file =
K(zone_page_state(zone, NR_ZONE_INACTIVE_FILE));
} else if (!strcmp(zone->name, "HighMem")) {
stats->highmem_nr_free =
K(zone_page_state(zone, NR_FREE_PAGES));
stats->highmem_nr_active_anon =
K(zone_page_state(zone, NR_ZONE_ACTIVE_ANON));
stats->highmem_nr_inactive_anon =
K(zone_page_state(zone, NR_ZONE_INACTIVE_ANON));
stats->highmem_nr_active_file =
K(zone_page_state(zone, NR_ZONE_ACTIVE_FILE));
stats->highmem_nr_inactive_file =
K(zone_page_state(zone, NR_ZONE_INACTIVE_FILE));
} else if (!strcmp(zone->name, "Movable")) {
stats->movable_nr_free =
K(zone_page_state(zone, NR_FREE_PAGES));
stats->movable_nr_active_anon =
K(zone_page_state(zone, NR_ZONE_ACTIVE_ANON));
stats->movable_nr_inactive_anon =
K(zone_page_state(zone, NR_ZONE_INACTIVE_ANON));
stats->movable_nr_active_file =
K(zone_page_state(zone, NR_ZONE_ACTIVE_FILE));
stats->movable_nr_inactive_file =
K(zone_page_state(zone, NR_ZONE_INACTIVE_FILE));
}
}
stats->zram_compressed = K(zspages);
}
#elif
static void sysstats_fill_zoneinfo(struct sysstats_mem *stats)
{
}
#endif
static void sysstats_build(struct sysstats_mem *stats)
{
struct sysinfo i;
si_meminfo(&i);
si_swapinfo(&i);
stats->memtotal = K(i.totalram);
stats->misc_reclaimable =
global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE) >> 10;
stats->buffer = K(i.bufferram);
stats->swap_used = K(i.totalswap - i.freeswap);
stats->swap_total = K(i.totalswap);
stats->vmalloc_total = K(vmalloc_nr_pages());
stats->swapcache = K(total_swapcache_pages());
stats->slab_reclaimable =
K(global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B));
stats->slab_unreclaimable =
K(global_node_page_state_pages(NR_SLAB_UNRECLAIMABLE_B));
stats->free_cma = K(global_zone_page_state(NR_FREE_CMA_PAGES));
stats->file_mapped = K(global_node_page_state(NR_FILE_MAPPED));
stats->kernelstack = global_node_page_state(NR_KERNEL_STACK_KB);
stats->pagetable = K(global_zone_page_state(NR_PAGETABLE));
stats->shmem = K(i.sharedram);
sysstats_fill_zoneinfo(stats);
}
#undef K
static int sysstats_meminfo_user_cmd(struct sk_buff *skb, struct genl_info *info)
{
int rc = 0;
struct sk_buff *rep_skb;
struct sysstats_mem *stats;
struct nlattr *na;
size_t size;
size = nla_total_size(sizeof(struct sysstats_mem));
rc = prepare_reply(info, SYSSTATS_MEMINFO_CMD_NEW, &rep_skb,
size);
if (rc < 0)
goto err;
na = nla_reserve(rep_skb, SYSSTATS_MEMINFO_TYPE_STATS,
sizeof(struct sysstats_mem));
if (na == NULL) {
nlmsg_free(rep_skb);
rc = -EMSGSIZE;
goto err;
}
stats = nla_data(na);
memset(stats, 0, sizeof(*stats));
sysstats_build(stats);
rc = send_reply(rep_skb, info);
err:
return rc;
}
static const struct genl_ops sysstats_ops[] = {
{
.cmd = SYSSTATS_TASK_CMD_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = sysstats_task_user_cmd,
.dumpit = sysstats_task_foreach,
},
{
.cmd = SYSSTATS_MEMINFO_CMD_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = sysstats_meminfo_user_cmd,
},
};
static struct genl_family family __ro_after_init = {
.name = SYSSTATS_GENL_NAME,
.version = SYSSTATS_GENL_VERSION,
.maxattr = SYSSTATS_CMD_ATTR_MAX,
.module = THIS_MODULE,
.ops = sysstats_ops,
.n_ops = ARRAY_SIZE(sysstats_ops),
.pre_doit = sysstats_pre_doit,
};
static int __init sysstats_init(void)
{
int rc;
rc = genl_register_family(&family);
if (rc)
return rc;
pr_info("registered sysstats version %d\n", SYSSTATS_GENL_VERSION);
return 0;
}
static void __exit sysstats_exit(void)
{
genl_unregister_family(&family);
}
module_init(sysstats_init);
module_exit(sysstats_exit);
MODULE_LICENSE("GPL v2");