android_kernel_samsung_sm8650/mm/page_pinner.c

435 lines
9.6 KiB
C
Raw Permalink Normal View History

ANDROID: mm: introduce page_pinner For CMA allocation, it's really critical to migrate a page but sometimes it fails. One of the reasons is some driver holds a page refcount for a long time so VM couldn't migrate the page at that time. The concern here is there is no way to find the who hold the refcount of the page effectively. This patch introduces feature to keep tracking page's pinner. All get_page sites are vulnerable to pin a page for a long time but the cost to keep track it would be significat since get_page is the most frequent kernel operation. Furthermore, the page could be not user page but kernel page which is not related to the page migration failure. Thus, this patch keeps tracks of only migration failed pages to reduce runtime cost. Once page migration fails in CMA allocation path, those pages are marked as "migration failure" and every put_page operation against those pages, callstack of the put are recorded into page_pinner buffer. Later, admin can see what pages were failed and who released the refcount since the failure. It really helps effectively to find out longtime refcount holder to prevent the page migration. note: page_pinner doesn't guarantee attributing/unattributing are atomic if they happen at the same time. It's just best effort so false-positive could happen. Bug: 183414571 BUg: 240196534 Signed-off-by: Minchan Kim <minchan@kernel.org> Signed-off-by: Minchan Kim <minchan@google.com> Change-Id: I603d0c0122734c377db6b1eb95848a6f734173a0 (cherry picked from commit 898cfbf094a2fc13c67fab5b5d3c916f0139833a)
2022-09-21 06:42:50 +09:00
// SPDX-License-Identifier: GPL-2.0
#include <linux/debugfs.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/memblock.h>
#include <linux/stacktrace.h>
#include <linux/page_pinner.h>
#include <linux/jump_label.h>
#include <linux/migrate.h>
#include <linux/stackdepot.h>
#include <linux/seq_file.h>
#include <linux/sched/clock.h>
#include "internal.h"
#define PAGE_PINNER_STACK_DEPTH 16
static unsigned long pp_buf_size = 4096;
struct page_pinner {
depot_stack_handle_t handle;
u64 ts_usec;
atomic_t count;
};
enum pp_state {
PP_PUT,
PP_FREE,
PP_FAIL_DETECTED,
};
struct captured_pinner {
depot_stack_handle_t handle;
union {
u64 ts_usec;
u64 elapsed;
};
/* struct page fields */
unsigned long pfn;
int count;
int mapcount;
struct address_space *mapping;
unsigned long flags;
enum pp_state state;
};
struct page_pinner_buffer {
spinlock_t lock;
unsigned long index;
struct captured_pinner *buffer;
};
/* alloc_contig failed pinner */
static struct page_pinner_buffer pp_buffer;
static bool page_pinner_enabled;
DEFINE_STATIC_KEY_FALSE(page_pinner_inited);
EXPORT_SYMBOL_GPL(page_pinner_inited);
ANDROID: mm: introduce page_pinner For CMA allocation, it's really critical to migrate a page but sometimes it fails. One of the reasons is some driver holds a page refcount for a long time so VM couldn't migrate the page at that time. The concern here is there is no way to find the who hold the refcount of the page effectively. This patch introduces feature to keep tracking page's pinner. All get_page sites are vulnerable to pin a page for a long time but the cost to keep track it would be significat since get_page is the most frequent kernel operation. Furthermore, the page could be not user page but kernel page which is not related to the page migration failure. Thus, this patch keeps tracks of only migration failed pages to reduce runtime cost. Once page migration fails in CMA allocation path, those pages are marked as "migration failure" and every put_page operation against those pages, callstack of the put are recorded into page_pinner buffer. Later, admin can see what pages were failed and who released the refcount since the failure. It really helps effectively to find out longtime refcount holder to prevent the page migration. note: page_pinner doesn't guarantee attributing/unattributing are atomic if they happen at the same time. It's just best effort so false-positive could happen. Bug: 183414571 BUg: 240196534 Signed-off-by: Minchan Kim <minchan@kernel.org> Signed-off-by: Minchan Kim <minchan@google.com> Change-Id: I603d0c0122734c377db6b1eb95848a6f734173a0 (cherry picked from commit 898cfbf094a2fc13c67fab5b5d3c916f0139833a)
2022-09-21 06:42:50 +09:00
DEFINE_STATIC_KEY_TRUE(failure_tracking);
static depot_stack_handle_t failure_handle;
static int __init early_page_pinner_param(char *buf)
{
page_pinner_enabled = true;
return 0;
}
early_param("page_pinner", early_page_pinner_param);
static bool need_page_pinner(void)
{
return page_pinner_enabled;
}
static noinline void register_failure_stack(void)
{
unsigned long entries[4];
unsigned int nr_entries;
nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 0);
failure_handle = stack_depot_save(entries, nr_entries, GFP_KERNEL);
}
static void init_page_pinner(void)
{
if (!page_pinner_enabled)
return;
pp_buffer.buffer = kvmalloc_array(pp_buf_size, sizeof(*pp_buffer.buffer),
GFP_KERNEL);
if (!pp_buffer.buffer) {
pr_info("page_pinner disabled due to failure of buffer allocation\n");
return;
}
spin_lock_init(&pp_buffer.lock);
pp_buffer.index = 0;
ANDROID: mm: introduce page_pinner For CMA allocation, it's really critical to migrate a page but sometimes it fails. One of the reasons is some driver holds a page refcount for a long time so VM couldn't migrate the page at that time. The concern here is there is no way to find the who hold the refcount of the page effectively. This patch introduces feature to keep tracking page's pinner. All get_page sites are vulnerable to pin a page for a long time but the cost to keep track it would be significat since get_page is the most frequent kernel operation. Furthermore, the page could be not user page but kernel page which is not related to the page migration failure. Thus, this patch keeps tracks of only migration failed pages to reduce runtime cost. Once page migration fails in CMA allocation path, those pages are marked as "migration failure" and every put_page operation against those pages, callstack of the put are recorded into page_pinner buffer. Later, admin can see what pages were failed and who released the refcount since the failure. It really helps effectively to find out longtime refcount holder to prevent the page migration. note: page_pinner doesn't guarantee attributing/unattributing are atomic if they happen at the same time. It's just best effort so false-positive could happen. Bug: 183414571 BUg: 240196534 Signed-off-by: Minchan Kim <minchan@kernel.org> Signed-off-by: Minchan Kim <minchan@google.com> Change-Id: I603d0c0122734c377db6b1eb95848a6f734173a0 (cherry picked from commit 898cfbf094a2fc13c67fab5b5d3c916f0139833a)
2022-09-21 06:42:50 +09:00
register_failure_stack();
static_branch_enable(&page_pinner_inited);
}
struct page_ext_operations page_pinner_ops = {
.size = sizeof(struct page_pinner),
.need = need_page_pinner,
.init = init_page_pinner,
};
static inline struct page_pinner *get_page_pinner(struct page_ext *page_ext)
{
return (void *)page_ext + page_pinner_ops.offset;
}
static noinline depot_stack_handle_t save_stack(gfp_t flags)
{
unsigned long entries[PAGE_PINNER_STACK_DEPTH];
depot_stack_handle_t handle;
unsigned int nr_entries;
nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 2);
handle = stack_depot_save(entries, nr_entries, flags);
if (!handle)
handle = failure_handle;
return handle;
}
static void capture_page_state(struct page *page,
struct captured_pinner *record)
{
record->flags = page->flags;
record->mapping = page_mapping(page);
record->pfn = page_to_pfn(page);
record->count = page_count(page);
record->mapcount = page_mapcount(page);
}
static void add_record(struct page_pinner_buffer *pp_buf,
struct captured_pinner *record)
{
unsigned long flags;
unsigned int idx;
spin_lock_irqsave(&pp_buf->lock, flags);
idx = pp_buf->index++;
pp_buf->index %= pp_buf_size;
pp_buf->buffer[idx] = *record;
spin_unlock_irqrestore(&pp_buf->lock, flags);
}
void __free_page_pinner(struct page *page, unsigned int order)
{
struct page_pinner *page_pinner;
struct page_ext *page_ext;
int i;
/* free_page could be called before buffer is initialized */
if (!pp_buffer.buffer)
return;
page_ext = page_ext_get(page);
ANDROID: mm: introduce page_pinner For CMA allocation, it's really critical to migrate a page but sometimes it fails. One of the reasons is some driver holds a page refcount for a long time so VM couldn't migrate the page at that time. The concern here is there is no way to find the who hold the refcount of the page effectively. This patch introduces feature to keep tracking page's pinner. All get_page sites are vulnerable to pin a page for a long time but the cost to keep track it would be significat since get_page is the most frequent kernel operation. Furthermore, the page could be not user page but kernel page which is not related to the page migration failure. Thus, this patch keeps tracks of only migration failed pages to reduce runtime cost. Once page migration fails in CMA allocation path, those pages are marked as "migration failure" and every put_page operation against those pages, callstack of the put are recorded into page_pinner buffer. Later, admin can see what pages were failed and who released the refcount since the failure. It really helps effectively to find out longtime refcount holder to prevent the page migration. note: page_pinner doesn't guarantee attributing/unattributing are atomic if they happen at the same time. It's just best effort so false-positive could happen. Bug: 183414571 BUg: 240196534 Signed-off-by: Minchan Kim <minchan@kernel.org> Signed-off-by: Minchan Kim <minchan@google.com> Change-Id: I603d0c0122734c377db6b1eb95848a6f734173a0 (cherry picked from commit 898cfbf094a2fc13c67fab5b5d3c916f0139833a)
2022-09-21 06:42:50 +09:00
if (unlikely(!page_ext))
return;
for (i = 0; i < (1 << order); i++) {
struct captured_pinner record;
if (!test_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags))
continue;
page_pinner = get_page_pinner(page_ext);
record.handle = save_stack(GFP_NOWAIT|__GFP_NOWARN);
record.ts_usec = (u64)ktime_to_us(ktime_get_boottime());
record.state = PP_FREE;
capture_page_state(page, &record);
add_record(&pp_buffer, &record);
atomic_set(&page_pinner->count, 0);
page_pinner->ts_usec = 0;
clear_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags);
page_ext = page_ext_next(page_ext);
}
page_ext_put(page_ext);
ANDROID: mm: introduce page_pinner For CMA allocation, it's really critical to migrate a page but sometimes it fails. One of the reasons is some driver holds a page refcount for a long time so VM couldn't migrate the page at that time. The concern here is there is no way to find the who hold the refcount of the page effectively. This patch introduces feature to keep tracking page's pinner. All get_page sites are vulnerable to pin a page for a long time but the cost to keep track it would be significat since get_page is the most frequent kernel operation. Furthermore, the page could be not user page but kernel page which is not related to the page migration failure. Thus, this patch keeps tracks of only migration failed pages to reduce runtime cost. Once page migration fails in CMA allocation path, those pages are marked as "migration failure" and every put_page operation against those pages, callstack of the put are recorded into page_pinner buffer. Later, admin can see what pages were failed and who released the refcount since the failure. It really helps effectively to find out longtime refcount holder to prevent the page migration. note: page_pinner doesn't guarantee attributing/unattributing are atomic if they happen at the same time. It's just best effort so false-positive could happen. Bug: 183414571 BUg: 240196534 Signed-off-by: Minchan Kim <minchan@kernel.org> Signed-off-by: Minchan Kim <minchan@google.com> Change-Id: I603d0c0122734c377db6b1eb95848a6f734173a0 (cherry picked from commit 898cfbf094a2fc13c67fab5b5d3c916f0139833a)
2022-09-21 06:42:50 +09:00
}
static ssize_t
print_page_pinner(char __user *buf, size_t count, struct captured_pinner *record)
{
int ret;
unsigned long *entries;
unsigned int nr_entries;
char *kbuf;
count = min_t(size_t, count, PAGE_SIZE);
kbuf = kmalloc(count, GFP_KERNEL);
if (!kbuf)
return -ENOMEM;
if (record->state == PP_PUT) {
ret = snprintf(kbuf, count, "At least, pinned for %llu us\n",
record->elapsed);
} else {
u64 ts_usec = record->ts_usec;
unsigned long rem_usec = do_div(ts_usec, 1000000);
ret = snprintf(kbuf, count,
"%s [%5lu.%06lu]\n",
record->state == PP_FREE ? "Freed at" :
"Failure detected at",
(unsigned long)ts_usec, rem_usec);
}
if (ret >= count)
goto err;
/* Print information relevant to grouping pages by mobility */
ret += snprintf(kbuf + ret, count - ret,
"PFN 0x%lx Block %lu count %d mapcount %d mapping %pS Flags %#lx(%pGp)\n",
record->pfn,
record->pfn >> pageblock_order,
record->count, record->mapcount,
record->mapping,
record->flags, &record->flags);
if (ret >= count)
goto err;
nr_entries = stack_depot_fetch(record->handle, &entries);
ret += stack_trace_snprint(kbuf + ret, count - ret, entries,
nr_entries, 0);
if (ret >= count)
goto err;
ret += snprintf(kbuf + ret, count - ret, "\n");
if (ret >= count)
goto err;
if (copy_to_user(buf, kbuf, ret))
ret = -EFAULT;
kfree(kbuf);
return ret;
err:
kfree(kbuf);
return -ENOMEM;
}
void __page_pinner_failure_detect(struct page *page)
{
struct page_ext *page_ext;
ANDROID: mm: introduce page_pinner For CMA allocation, it's really critical to migrate a page but sometimes it fails. One of the reasons is some driver holds a page refcount for a long time so VM couldn't migrate the page at that time. The concern here is there is no way to find the who hold the refcount of the page effectively. This patch introduces feature to keep tracking page's pinner. All get_page sites are vulnerable to pin a page for a long time but the cost to keep track it would be significat since get_page is the most frequent kernel operation. Furthermore, the page could be not user page but kernel page which is not related to the page migration failure. Thus, this patch keeps tracks of only migration failed pages to reduce runtime cost. Once page migration fails in CMA allocation path, those pages are marked as "migration failure" and every put_page operation against those pages, callstack of the put are recorded into page_pinner buffer. Later, admin can see what pages were failed and who released the refcount since the failure. It really helps effectively to find out longtime refcount holder to prevent the page migration. note: page_pinner doesn't guarantee attributing/unattributing are atomic if they happen at the same time. It's just best effort so false-positive could happen. Bug: 183414571 BUg: 240196534 Signed-off-by: Minchan Kim <minchan@kernel.org> Signed-off-by: Minchan Kim <minchan@google.com> Change-Id: I603d0c0122734c377db6b1eb95848a6f734173a0 (cherry picked from commit 898cfbf094a2fc13c67fab5b5d3c916f0139833a)
2022-09-21 06:42:50 +09:00
struct page_pinner *page_pinner;
struct captured_pinner record;
u64 now;
if (!static_branch_unlikely(&failure_tracking))
return;
page_ext = page_ext_get(page);
ANDROID: mm: introduce page_pinner For CMA allocation, it's really critical to migrate a page but sometimes it fails. One of the reasons is some driver holds a page refcount for a long time so VM couldn't migrate the page at that time. The concern here is there is no way to find the who hold the refcount of the page effectively. This patch introduces feature to keep tracking page's pinner. All get_page sites are vulnerable to pin a page for a long time but the cost to keep track it would be significat since get_page is the most frequent kernel operation. Furthermore, the page could be not user page but kernel page which is not related to the page migration failure. Thus, this patch keeps tracks of only migration failed pages to reduce runtime cost. Once page migration fails in CMA allocation path, those pages are marked as "migration failure" and every put_page operation against those pages, callstack of the put are recorded into page_pinner buffer. Later, admin can see what pages were failed and who released the refcount since the failure. It really helps effectively to find out longtime refcount holder to prevent the page migration. note: page_pinner doesn't guarantee attributing/unattributing are atomic if they happen at the same time. It's just best effort so false-positive could happen. Bug: 183414571 BUg: 240196534 Signed-off-by: Minchan Kim <minchan@kernel.org> Signed-off-by: Minchan Kim <minchan@google.com> Change-Id: I603d0c0122734c377db6b1eb95848a6f734173a0 (cherry picked from commit 898cfbf094a2fc13c67fab5b5d3c916f0139833a)
2022-09-21 06:42:50 +09:00
if (unlikely(!page_ext))
return;
if (test_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags)) {
page_ext_put(page_ext);
ANDROID: mm: introduce page_pinner For CMA allocation, it's really critical to migrate a page but sometimes it fails. One of the reasons is some driver holds a page refcount for a long time so VM couldn't migrate the page at that time. The concern here is there is no way to find the who hold the refcount of the page effectively. This patch introduces feature to keep tracking page's pinner. All get_page sites are vulnerable to pin a page for a long time but the cost to keep track it would be significat since get_page is the most frequent kernel operation. Furthermore, the page could be not user page but kernel page which is not related to the page migration failure. Thus, this patch keeps tracks of only migration failed pages to reduce runtime cost. Once page migration fails in CMA allocation path, those pages are marked as "migration failure" and every put_page operation against those pages, callstack of the put are recorded into page_pinner buffer. Later, admin can see what pages were failed and who released the refcount since the failure. It really helps effectively to find out longtime refcount holder to prevent the page migration. note: page_pinner doesn't guarantee attributing/unattributing are atomic if they happen at the same time. It's just best effort so false-positive could happen. Bug: 183414571 BUg: 240196534 Signed-off-by: Minchan Kim <minchan@kernel.org> Signed-off-by: Minchan Kim <minchan@google.com> Change-Id: I603d0c0122734c377db6b1eb95848a6f734173a0 (cherry picked from commit 898cfbf094a2fc13c67fab5b5d3c916f0139833a)
2022-09-21 06:42:50 +09:00
return;
}
ANDROID: mm: introduce page_pinner For CMA allocation, it's really critical to migrate a page but sometimes it fails. One of the reasons is some driver holds a page refcount for a long time so VM couldn't migrate the page at that time. The concern here is there is no way to find the who hold the refcount of the page effectively. This patch introduces feature to keep tracking page's pinner. All get_page sites are vulnerable to pin a page for a long time but the cost to keep track it would be significat since get_page is the most frequent kernel operation. Furthermore, the page could be not user page but kernel page which is not related to the page migration failure. Thus, this patch keeps tracks of only migration failed pages to reduce runtime cost. Once page migration fails in CMA allocation path, those pages are marked as "migration failure" and every put_page operation against those pages, callstack of the put are recorded into page_pinner buffer. Later, admin can see what pages were failed and who released the refcount since the failure. It really helps effectively to find out longtime refcount holder to prevent the page migration. note: page_pinner doesn't guarantee attributing/unattributing are atomic if they happen at the same time. It's just best effort so false-positive could happen. Bug: 183414571 BUg: 240196534 Signed-off-by: Minchan Kim <minchan@kernel.org> Signed-off-by: Minchan Kim <minchan@google.com> Change-Id: I603d0c0122734c377db6b1eb95848a6f734173a0 (cherry picked from commit 898cfbf094a2fc13c67fab5b5d3c916f0139833a)
2022-09-21 06:42:50 +09:00
now = (u64)ktime_to_us(ktime_get_boottime());
page_pinner = get_page_pinner(page_ext);
if (!page_pinner->ts_usec)
page_pinner->ts_usec = now;
set_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags);
record.handle = save_stack(GFP_NOWAIT|__GFP_NOWARN);
record.ts_usec = now;
record.state = PP_FAIL_DETECTED;
capture_page_state(page, &record);
add_record(&pp_buffer, &record);
page_ext_put(page_ext);
ANDROID: mm: introduce page_pinner For CMA allocation, it's really critical to migrate a page but sometimes it fails. One of the reasons is some driver holds a page refcount for a long time so VM couldn't migrate the page at that time. The concern here is there is no way to find the who hold the refcount of the page effectively. This patch introduces feature to keep tracking page's pinner. All get_page sites are vulnerable to pin a page for a long time but the cost to keep track it would be significat since get_page is the most frequent kernel operation. Furthermore, the page could be not user page but kernel page which is not related to the page migration failure. Thus, this patch keeps tracks of only migration failed pages to reduce runtime cost. Once page migration fails in CMA allocation path, those pages are marked as "migration failure" and every put_page operation against those pages, callstack of the put are recorded into page_pinner buffer. Later, admin can see what pages were failed and who released the refcount since the failure. It really helps effectively to find out longtime refcount holder to prevent the page migration. note: page_pinner doesn't guarantee attributing/unattributing are atomic if they happen at the same time. It's just best effort so false-positive could happen. Bug: 183414571 BUg: 240196534 Signed-off-by: Minchan Kim <minchan@kernel.org> Signed-off-by: Minchan Kim <minchan@google.com> Change-Id: I603d0c0122734c377db6b1eb95848a6f734173a0 (cherry picked from commit 898cfbf094a2fc13c67fab5b5d3c916f0139833a)
2022-09-21 06:42:50 +09:00
}
EXPORT_SYMBOL_GPL(__page_pinner_failure_detect);
void __page_pinner_put_page(struct page *page)
{
struct page_ext *page_ext;
ANDROID: mm: introduce page_pinner For CMA allocation, it's really critical to migrate a page but sometimes it fails. One of the reasons is some driver holds a page refcount for a long time so VM couldn't migrate the page at that time. The concern here is there is no way to find the who hold the refcount of the page effectively. This patch introduces feature to keep tracking page's pinner. All get_page sites are vulnerable to pin a page for a long time but the cost to keep track it would be significat since get_page is the most frequent kernel operation. Furthermore, the page could be not user page but kernel page which is not related to the page migration failure. Thus, this patch keeps tracks of only migration failed pages to reduce runtime cost. Once page migration fails in CMA allocation path, those pages are marked as "migration failure" and every put_page operation against those pages, callstack of the put are recorded into page_pinner buffer. Later, admin can see what pages were failed and who released the refcount since the failure. It really helps effectively to find out longtime refcount holder to prevent the page migration. note: page_pinner doesn't guarantee attributing/unattributing are atomic if they happen at the same time. It's just best effort so false-positive could happen. Bug: 183414571 BUg: 240196534 Signed-off-by: Minchan Kim <minchan@kernel.org> Signed-off-by: Minchan Kim <minchan@google.com> Change-Id: I603d0c0122734c377db6b1eb95848a6f734173a0 (cherry picked from commit 898cfbf094a2fc13c67fab5b5d3c916f0139833a)
2022-09-21 06:42:50 +09:00
struct page_pinner *page_pinner;
struct captured_pinner record;
u64 now, ts_usec;
if (!static_branch_unlikely(&failure_tracking))
return;
page_ext = page_ext_get(page);
ANDROID: mm: introduce page_pinner For CMA allocation, it's really critical to migrate a page but sometimes it fails. One of the reasons is some driver holds a page refcount for a long time so VM couldn't migrate the page at that time. The concern here is there is no way to find the who hold the refcount of the page effectively. This patch introduces feature to keep tracking page's pinner. All get_page sites are vulnerable to pin a page for a long time but the cost to keep track it would be significat since get_page is the most frequent kernel operation. Furthermore, the page could be not user page but kernel page which is not related to the page migration failure. Thus, this patch keeps tracks of only migration failed pages to reduce runtime cost. Once page migration fails in CMA allocation path, those pages are marked as "migration failure" and every put_page operation against those pages, callstack of the put are recorded into page_pinner buffer. Later, admin can see what pages were failed and who released the refcount since the failure. It really helps effectively to find out longtime refcount holder to prevent the page migration. note: page_pinner doesn't guarantee attributing/unattributing are atomic if they happen at the same time. It's just best effort so false-positive could happen. Bug: 183414571 BUg: 240196534 Signed-off-by: Minchan Kim <minchan@kernel.org> Signed-off-by: Minchan Kim <minchan@google.com> Change-Id: I603d0c0122734c377db6b1eb95848a6f734173a0 (cherry picked from commit 898cfbf094a2fc13c67fab5b5d3c916f0139833a)
2022-09-21 06:42:50 +09:00
if (unlikely(!page_ext))
return;
if (!test_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags)) {
page_ext_put(page_ext);
ANDROID: mm: introduce page_pinner For CMA allocation, it's really critical to migrate a page but sometimes it fails. One of the reasons is some driver holds a page refcount for a long time so VM couldn't migrate the page at that time. The concern here is there is no way to find the who hold the refcount of the page effectively. This patch introduces feature to keep tracking page's pinner. All get_page sites are vulnerable to pin a page for a long time but the cost to keep track it would be significat since get_page is the most frequent kernel operation. Furthermore, the page could be not user page but kernel page which is not related to the page migration failure. Thus, this patch keeps tracks of only migration failed pages to reduce runtime cost. Once page migration fails in CMA allocation path, those pages are marked as "migration failure" and every put_page operation against those pages, callstack of the put are recorded into page_pinner buffer. Later, admin can see what pages were failed and who released the refcount since the failure. It really helps effectively to find out longtime refcount holder to prevent the page migration. note: page_pinner doesn't guarantee attributing/unattributing are atomic if they happen at the same time. It's just best effort so false-positive could happen. Bug: 183414571 BUg: 240196534 Signed-off-by: Minchan Kim <minchan@kernel.org> Signed-off-by: Minchan Kim <minchan@google.com> Change-Id: I603d0c0122734c377db6b1eb95848a6f734173a0 (cherry picked from commit 898cfbf094a2fc13c67fab5b5d3c916f0139833a)
2022-09-21 06:42:50 +09:00
return;
}
ANDROID: mm: introduce page_pinner For CMA allocation, it's really critical to migrate a page but sometimes it fails. One of the reasons is some driver holds a page refcount for a long time so VM couldn't migrate the page at that time. The concern here is there is no way to find the who hold the refcount of the page effectively. This patch introduces feature to keep tracking page's pinner. All get_page sites are vulnerable to pin a page for a long time but the cost to keep track it would be significat since get_page is the most frequent kernel operation. Furthermore, the page could be not user page but kernel page which is not related to the page migration failure. Thus, this patch keeps tracks of only migration failed pages to reduce runtime cost. Once page migration fails in CMA allocation path, those pages are marked as "migration failure" and every put_page operation against those pages, callstack of the put are recorded into page_pinner buffer. Later, admin can see what pages were failed and who released the refcount since the failure. It really helps effectively to find out longtime refcount holder to prevent the page migration. note: page_pinner doesn't guarantee attributing/unattributing are atomic if they happen at the same time. It's just best effort so false-positive could happen. Bug: 183414571 BUg: 240196534 Signed-off-by: Minchan Kim <minchan@kernel.org> Signed-off-by: Minchan Kim <minchan@google.com> Change-Id: I603d0c0122734c377db6b1eb95848a6f734173a0 (cherry picked from commit 898cfbf094a2fc13c67fab5b5d3c916f0139833a)
2022-09-21 06:42:50 +09:00
page_pinner = get_page_pinner(page_ext);
record.handle = save_stack(GFP_NOWAIT|__GFP_NOWARN);
now = (u64)ktime_to_us(ktime_get_boottime());
ts_usec = page_pinner->ts_usec;
if (now > ts_usec)
record.elapsed = now - ts_usec;
else
record.elapsed = 0;
record.state = PP_PUT;
capture_page_state(page, &record);
add_record(&pp_buffer, &record);
page_ext_put(page_ext);
ANDROID: mm: introduce page_pinner For CMA allocation, it's really critical to migrate a page but sometimes it fails. One of the reasons is some driver holds a page refcount for a long time so VM couldn't migrate the page at that time. The concern here is there is no way to find the who hold the refcount of the page effectively. This patch introduces feature to keep tracking page's pinner. All get_page sites are vulnerable to pin a page for a long time but the cost to keep track it would be significat since get_page is the most frequent kernel operation. Furthermore, the page could be not user page but kernel page which is not related to the page migration failure. Thus, this patch keeps tracks of only migration failed pages to reduce runtime cost. Once page migration fails in CMA allocation path, those pages are marked as "migration failure" and every put_page operation against those pages, callstack of the put are recorded into page_pinner buffer. Later, admin can see what pages were failed and who released the refcount since the failure. It really helps effectively to find out longtime refcount holder to prevent the page migration. note: page_pinner doesn't guarantee attributing/unattributing are atomic if they happen at the same time. It's just best effort so false-positive could happen. Bug: 183414571 BUg: 240196534 Signed-off-by: Minchan Kim <minchan@kernel.org> Signed-off-by: Minchan Kim <minchan@google.com> Change-Id: I603d0c0122734c377db6b1eb95848a6f734173a0 (cherry picked from commit 898cfbf094a2fc13c67fab5b5d3c916f0139833a)
2022-09-21 06:42:50 +09:00
}
EXPORT_SYMBOL_GPL(__page_pinner_put_page);
static ssize_t read_buffer(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
u64 tmp;
loff_t i, idx;
struct captured_pinner record;
unsigned long flags;
if (!static_branch_unlikely(&failure_tracking))
return -EINVAL;
if (*ppos >= pp_buf_size)
return 0;
i = *ppos;
*ppos = i + 1;
/*
* reading the records in the reverse order with newest one
* being read first followed by older ones
*/
tmp = pp_buffer.index - 1 - i + pp_buf_size;
idx = do_div(tmp, pp_buf_size);
spin_lock_irqsave(&pp_buffer.lock, flags);
record = pp_buffer.buffer[idx];
spin_unlock_irqrestore(&pp_buffer.lock, flags);
if (!record.handle)
return 0;
return print_page_pinner(buf, count, &record);
}
static const struct file_operations proc_buffer_operations = {
.read = read_buffer,
};
static int failure_tracking_set(void *data, u64 val)
{
bool on;
on = (bool)val;
if (on)
static_branch_enable(&failure_tracking);
else
static_branch_disable(&failure_tracking);
return 0;
}
static int failure_tracking_get(void *data, u64 *val)
{
*val = static_branch_unlikely(&failure_tracking);
return 0;
}
DEFINE_DEBUGFS_ATTRIBUTE(failure_tracking_fops,
failure_tracking_get,
failure_tracking_set, "%llu\n");
static int buffer_size_set(void *data, u64 val)
{
unsigned long flags;
struct captured_pinner *new, *old;
new = kvmalloc_array(val, sizeof(*new), GFP_KERNEL);
if (!new)
return -ENOMEM;
spin_lock_irqsave(&pp_buffer.lock, flags);
old = pp_buffer.buffer;
pp_buffer.buffer = new;
pp_buffer.index = 0;
pp_buf_size = val;
spin_unlock_irqrestore(&pp_buffer.lock, flags);
kvfree(old);
return 0;
}
static int buffer_size_get(void *data, u64 *val)
{
*val = pp_buf_size;
return 0;
}
DEFINE_DEBUGFS_ATTRIBUTE(buffer_size_fops,
buffer_size_get,
buffer_size_set, "%llu\n");
static int __init page_pinner_init(void)
{
struct dentry *pp_debugfs_root;
if (!static_branch_unlikely(&page_pinner_inited))
return 0;
pr_info("page_pinner enabled\n");
pp_debugfs_root = debugfs_create_dir("page_pinner", NULL);
debugfs_create_file("buffer", 0444,
pp_debugfs_root, NULL,
&proc_buffer_operations);
debugfs_create_file("failure_tracking", 0644,
pp_debugfs_root, NULL,
&failure_tracking_fops);
debugfs_create_file("buffer_size", 0644,
pp_debugfs_root, NULL,
&buffer_size_fops);
return 0;
}
late_initcall(page_pinner_init)