For CMA allocation, it's really critical to migrate a page but sometimes it fails. One of the reasons is some driver holds a page refcount for a long time so VM couldn't migrate the page at that time. The concern here is there is no way to find the who hold the refcount of the page effectively. This patch introduces feature to keep tracking page's pinner. All get_page sites are vulnerable to pin a page for a long time but the cost to keep track it would be significat since get_page is the most frequent kernel operation. Furthermore, the page could be not user page but kernel page which is not related to the page migration failure. So, this patch keeps tracking only get_user_pages/follow_page with (FOLL_GET|PIN friends because they are the very common APIs to pin user pages which could cause migration failure and the less frequent than get_page so runtime cost wouldn't be that big but could cover many cases effectively. This patch also introduces put_user_page API. It aims for attributing "the pinner releases the page from now on" while it release the page refcount. Thus, any user of get_user_pages/follow_page(FOLL_GET) must use put_user_page as pair of those functions. Otherwise, page_pinner will treat them long term pinner as false postive but nothing should affect stability. * $debugfs/page_pinner/threshold It indicates threshold(microsecond) to flag long term pinning. It's configurable(Default is 300000us). Once you write new value to the threshold, old data will clear. * $debugfs/page_pinner/longterm_pinner It shows call sites where the duration of pinning was greater than the threshold. Internally, it uses a static array to keep 4096 elements and overwrites old ones once overflow happens. Therefore, you could lose some information. example) Page pinned ts 76953865787 us count 1 PFN 9856945 Block 9625 type Movable Flags 0x8000000000080014(uptodate|lru|swapbacked) __set_page_pinner+0x34/0xcc try_grab_page+0x19c/0x1a0 follow_page_pte+0x1c0/0x33c follow_page_mask+0xc0/0xc8 __get_user_pages+0x178/0x414 __gup_longterm_locked+0x80/0x148 internal_get_user_pages_fast+0x140/0x174 pin_user_pages_fast+0x24/0x40 CCC BBB AAA __arm64_sys_ioctl+0x94/0xd0 el0_svc_common+0xa4/0x180 do_el0_svc+0x28/0x88 el0_svc+0x14/0x24 note: page_pinner doesn't guarantee attributing/unattributing are atomic if they happen at the same time. It's just best effort so false-positive could happen. Bug: 183414571 Signed-off-by: Minchan Kim <minchan@kernel.org> Signed-off-by: Minchan Kim <minchan@google.com> Change-Id: Ife37ec360eef993d390b9c131732218a4dfd2f04
127 lines
4.5 KiB
Makefile
127 lines
4.5 KiB
Makefile
# SPDX-License-Identifier: GPL-2.0
|
|
#
|
|
# Makefile for the linux memory manager.
|
|
#
|
|
|
|
KASAN_SANITIZE_slab_common.o := n
|
|
KASAN_SANITIZE_slab.o := n
|
|
KASAN_SANITIZE_slub.o := n
|
|
KCSAN_SANITIZE_kmemleak.o := n
|
|
|
|
# These produce frequent data race reports: most of them are due to races on
|
|
# the same word but accesses to different bits of that word. Re-enable KCSAN
|
|
# for these when we have more consensus on what to do about them.
|
|
KCSAN_SANITIZE_slab_common.o := n
|
|
KCSAN_SANITIZE_slab.o := n
|
|
KCSAN_SANITIZE_slub.o := n
|
|
KCSAN_SANITIZE_page_alloc.o := n
|
|
|
|
# These files are disabled because they produce non-interesting and/or
|
|
# flaky coverage that is not a function of syscall inputs. E.g. slab is out of
|
|
# free pages, or a task is migrated between nodes.
|
|
KCOV_INSTRUMENT_slab_common.o := n
|
|
KCOV_INSTRUMENT_slob.o := n
|
|
KCOV_INSTRUMENT_slab.o := n
|
|
KCOV_INSTRUMENT_slub.o := n
|
|
KCOV_INSTRUMENT_page_alloc.o := n
|
|
KCOV_INSTRUMENT_debug-pagealloc.o := n
|
|
KCOV_INSTRUMENT_kmemleak.o := n
|
|
KCOV_INSTRUMENT_memcontrol.o := n
|
|
KCOV_INSTRUMENT_mmzone.o := n
|
|
KCOV_INSTRUMENT_vmstat.o := n
|
|
KCOV_INSTRUMENT_failslab.o := n
|
|
|
|
CFLAGS_init-mm.o += $(call cc-disable-warning, override-init)
|
|
CFLAGS_init-mm.o += $(call cc-disable-warning, initializer-overrides)
|
|
|
|
mmu-y := nommu.o
|
|
mmu-$(CONFIG_MMU) := highmem.o memory.o mincore.o \
|
|
mlock.o mmap.o mmu_gather.o mprotect.o mremap.o \
|
|
msync.o page_vma_mapped.o pagewalk.o \
|
|
pgtable-generic.o rmap.o vmalloc.o ioremap.o
|
|
|
|
|
|
ifdef CONFIG_CROSS_MEMORY_ATTACH
|
|
mmu-$(CONFIG_MMU) += process_vm_access.o
|
|
endif
|
|
|
|
obj-y := filemap.o mempool.o oom_kill.o fadvise.o \
|
|
maccess.o page-writeback.o \
|
|
readahead.o swap.o truncate.o vmscan.o shmem.o \
|
|
util.o mmzone.o vmstat.o backing-dev.o \
|
|
mm_init.o percpu.o slab_common.o \
|
|
compaction.o vmacache.o \
|
|
interval_tree.o list_lru.o workingset.o \
|
|
debug.o gup.o $(mmu-y)
|
|
|
|
# Give 'page_alloc' its own module-parameter namespace
|
|
page-alloc-y := page_alloc.o
|
|
page-alloc-$(CONFIG_SHUFFLE_PAGE_ALLOCATOR) += shuffle.o
|
|
CFLAGS_page_alloc.o += -DDYNAMIC_DEBUG_MODULE
|
|
|
|
obj-y += page-alloc.o
|
|
obj-y += init-mm.o
|
|
obj-y += memblock.o
|
|
|
|
ifdef CONFIG_MMU
|
|
obj-$(CONFIG_ADVISE_SYSCALLS) += madvise.o
|
|
endif
|
|
|
|
obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o swap_slots.o
|
|
obj-$(CONFIG_FRONTSWAP) += frontswap.o
|
|
obj-$(CONFIG_ZSWAP) += zswap.o
|
|
obj-$(CONFIG_HAS_DMA) += dmapool.o
|
|
obj-$(CONFIG_HUGETLBFS) += hugetlb.o
|
|
obj-$(CONFIG_NUMA) += mempolicy.o
|
|
obj-$(CONFIG_SPARSEMEM) += sparse.o
|
|
obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o
|
|
obj-$(CONFIG_SLOB) += slob.o
|
|
obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
|
|
obj-$(CONFIG_KSM) += ksm.o
|
|
obj-$(CONFIG_PAGE_POISONING) += page_poison.o
|
|
obj-$(CONFIG_SLAB) += slab.o
|
|
obj-$(CONFIG_SLUB) += slub.o
|
|
obj-$(CONFIG_KASAN) += kasan/
|
|
obj-$(CONFIG_KFENCE) += kfence/
|
|
obj-$(CONFIG_FAILSLAB) += failslab.o
|
|
obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
|
|
obj-$(CONFIG_MEMTEST) += memtest.o
|
|
obj-$(CONFIG_MIGRATION) += migrate.o
|
|
obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o khugepaged.o
|
|
obj-$(CONFIG_PAGE_COUNTER) += page_counter.o
|
|
obj-$(CONFIG_MEMCG) += memcontrol.o vmpressure.o
|
|
obj-$(CONFIG_MEMCG_SWAP) += swap_cgroup.o
|
|
obj-$(CONFIG_CGROUP_HUGETLB) += hugetlb_cgroup.o
|
|
obj-$(CONFIG_GUP_BENCHMARK) += gup_benchmark.o
|
|
obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
|
|
obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o
|
|
obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
|
|
obj-$(CONFIG_DEBUG_RODATA_TEST) += rodata_test.o
|
|
obj-$(CONFIG_DEBUG_VM_PGTABLE) += debug_vm_pgtable.o
|
|
obj-$(CONFIG_PAGE_OWNER) += page_owner.o
|
|
obj-$(CONFIG_PAGE_PINNER) += page_pinner.o
|
|
obj-$(CONFIG_CLEANCACHE) += cleancache.o
|
|
obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o
|
|
obj-$(CONFIG_ZPOOL) += zpool.o
|
|
obj-$(CONFIG_ZBUD) += zbud.o
|
|
obj-$(CONFIG_ZSMALLOC) += zsmalloc.o
|
|
obj-$(CONFIG_Z3FOLD) += z3fold.o
|
|
obj-$(CONFIG_GENERIC_EARLY_IOREMAP) += early_ioremap.o
|
|
obj-$(CONFIG_CMA) += cma.o
|
|
obj-$(CONFIG_MEMORY_BALLOON) += balloon_compaction.o
|
|
obj-$(CONFIG_PAGE_EXTENSION) += page_ext.o
|
|
obj-$(CONFIG_CMA_DEBUGFS) += cma_debug.o
|
|
obj-$(CONFIG_CMA_SYSFS) += cma_sysfs.o
|
|
obj-$(CONFIG_USERFAULTFD) += userfaultfd.o
|
|
obj-$(CONFIG_IDLE_PAGE_TRACKING) += page_idle.o
|
|
obj-$(CONFIG_FRAME_VECTOR) += frame_vector.o
|
|
obj-$(CONFIG_DEBUG_PAGE_REF) += debug_page_ref.o
|
|
obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o
|
|
obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o
|
|
obj-$(CONFIG_ZONE_DEVICE) += memremap.o
|
|
obj-$(CONFIG_HMM_MIRROR) += hmm.o
|
|
obj-$(CONFIG_MEMFD_CREATE) += memfd.o
|
|
obj-$(CONFIG_MAPPING_DIRTY_HELPERS) += mapping_dirty_helpers.o
|
|
obj-$(CONFIG_PTDUMP_CORE) += ptdump.o
|
|
obj-$(CONFIG_PAGE_REPORTING) += page_reporting.o
|