Merge 5154e60796 ("mm/swap: cache swap migration A/D bits support") into android-mainline

Steps on the way to 6.1-rc1

Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
Change-Id: I323198adbfd3a7009a23ca058a5a592786ee7737
This commit is contained in:
Greg Kroah-Hartman 2022-10-20 11:56:39 +02:00
commit 3956baa5d0
23 changed files with 339 additions and 98 deletions

View File

@ -245,7 +245,7 @@ static inline struct folio *hugetlb_swap_entry_to_folio(swp_entry_t entry)
{
VM_BUG_ON(!is_migration_entry(entry) && !is_hwpoison_entry(entry));
return page_folio(pfn_to_page(swp_offset(entry)));
return page_folio(pfn_to_page(swp_offset_pfn(entry)));
}
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,

View File

@ -256,10 +256,10 @@ static inline pud_t native_pudp_get_and_clear(pud_t *pudp)
/* We always extract/encode the offset by shifting it all the way up, and then down again */
#define SWP_OFFSET_SHIFT (SWP_OFFSET_FIRST_BIT + SWP_TYPE_BITS)
#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > 5)
#define __swp_type(x) (((x).val) & 0x1f)
#define __swp_offset(x) ((x).val >> 5)
#define __swp_entry(type, offset) ((swp_entry_t){(type) | (offset) << 5})
#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
#define __swp_type(x) (((x).val) & ((1UL << SWP_TYPE_BITS) - 1))
#define __swp_offset(x) ((x).val >> SWP_TYPE_BITS)
#define __swp_entry(type, offset) ((swp_entry_t){(type) | (offset) << SWP_TYPE_BITS})
/*
* Normally, __swp_entry() converts from arch-independent swp_entry_t to

View File

@ -1054,7 +1054,7 @@ void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache)
}
#ifdef CONFIG_SWAP
unsigned long max_swapfile_size(void)
unsigned long arch_max_swapfile_size(void)
{
unsigned long pages;

View File

@ -1418,9 +1418,19 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
if (pte_swp_uffd_wp(pte))
flags |= PM_UFFD_WP;
entry = pte_to_swp_entry(pte);
if (pm->show_pfn)
if (pm->show_pfn) {
pgoff_t offset;
/*
* For PFN swap offsets, keeping the offset field
* to be PFN only to be compatible with old smaps.
*/
if (is_pfn_swap_entry(entry))
offset = swp_offset_pfn(entry);
else
offset = swp_offset(entry);
frame = swp_type(entry) |
(swp_offset(entry) << MAX_SWAPFILES_SHIFT);
(offset << MAX_SWAPFILES_SHIFT);
}
flags |= PM_SWAP;
migration = is_migration_entry(entry);
if (is_pfn_swap_entry(entry))
@ -1477,7 +1487,11 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
unsigned long offset;
if (pm->show_pfn) {
offset = swp_offset(entry) +
if (is_pfn_swap_entry(entry))
offset = swp_offset_pfn(entry);
else
offset = swp_offset(entry);
offset = offset +
((addr & ~PMD_MASK) >> PAGE_SHIFT);
frame = swp_type(entry) |
(offset << MAX_SWAPFILES_SHIFT);

View File

@ -2999,8 +2999,8 @@ static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags)
* PageAnonExclusive() has to protect against concurrent GUP:
* * Ordinary GUP: Using the PT lock
* * GUP-fast and fork(): mm->write_protect_seq
* * GUP-fast and KSM or temporary unmapping (swap, migration):
* clear/invalidate+flush of the page table entry
* * GUP-fast and KSM or temporary unmapping (swap, migration): see
* page_try_share_anon_rmap()
*
* Must be called with the (sub)page that's actually referenced via the
* page table entry, which might not necessarily be the head page for a
@ -3021,6 +3021,11 @@ static inline bool gup_must_unshare(unsigned int flags, struct page *page)
*/
if (!PageAnon(page))
return false;
/* Paired with a memory barrier in page_try_share_anon_rmap(). */
if (IS_ENABLED(CONFIG_HAVE_FAST_GUP))
smp_rmb();
/*
* Note that PageKsm() pages cannot be exclusive, and consequently,
* cannot get pinned.

View File

@ -270,7 +270,7 @@ static inline int page_try_dup_anon_rmap(struct page *page, bool compound,
* @page: the exclusive anonymous page to try marking possibly shared
*
* The caller needs to hold the PT lock and has to have the page table entry
* cleared/invalidated+flushed, to properly sync against GUP-fast.
* cleared/invalidated.
*
* This is similar to page_try_dup_anon_rmap(), however, not used during fork()
* to duplicate a mapping, but instead to prepare for KSM or temporarily
@ -286,12 +286,68 @@ static inline int page_try_share_anon_rmap(struct page *page)
{
VM_BUG_ON_PAGE(!PageAnon(page) || !PageAnonExclusive(page), page);
/* See page_try_dup_anon_rmap(). */
if (likely(!is_device_private_page(page) &&
unlikely(page_maybe_dma_pinned(page))))
return -EBUSY;
/* device private pages cannot get pinned via GUP. */
if (unlikely(is_device_private_page(page))) {
ClearPageAnonExclusive(page);
return 0;
}
/*
* We have to make sure that when we clear PageAnonExclusive, that
* the page is not pinned and that concurrent GUP-fast won't succeed in
* concurrently pinning the page.
*
* Conceptually, PageAnonExclusive clearing consists of:
* (A1) Clear PTE
* (A2) Check if the page is pinned; back off if so.
* (A3) Clear PageAnonExclusive
* (A4) Restore PTE (optional, but certainly not writable)
*
* When clearing PageAnonExclusive, we cannot possibly map the page
* writable again, because anon pages that may be shared must never
* be writable. So in any case, if the PTE was writable it cannot
* be writable anymore afterwards and there would be a PTE change. Only
* if the PTE wasn't writable, there might not be a PTE change.
*
* Conceptually, GUP-fast pinning of an anon page consists of:
* (B1) Read the PTE
* (B2) FOLL_WRITE: check if the PTE is not writable; back off if so.
* (B3) Pin the mapped page
* (B4) Check if the PTE changed by re-reading it; back off if so.
* (B5) If the original PTE is not writable, check if
* PageAnonExclusive is not set; back off if so.
*
* If the PTE was writable, we only have to make sure that GUP-fast
* observes a PTE change and properly backs off.
*
* If the PTE was not writable, we have to make sure that GUP-fast either
* detects a (temporary) PTE change or that PageAnonExclusive is cleared
* and properly backs off.
*
* Consequently, when clearing PageAnonExclusive(), we have to make
* sure that (A1), (A2)/(A3) and (A4) happen in the right memory
* order. In GUP-fast pinning code, we have to make sure that (B3),(B4)
* and (B5) happen in the right memory order.
*
* We assume that there might not be a memory barrier after
* clearing/invalidating the PTE (A1) and before restoring the PTE (A4),
* so we use explicit ones here.
*/
/* Paired with the memory barrier in try_grab_folio(). */
if (IS_ENABLED(CONFIG_HAVE_FAST_GUP))
smp_mb();
if (unlikely(page_maybe_dma_pinned(page)))
return -EBUSY;
ClearPageAnonExclusive(page);
/*
* This is conceptually a smp_wmb() paired with the smp_rmb() in
* gup_must_unshare().
*/
if (IS_ENABLED(CONFIG_HAVE_FAST_GUP))
smp_mb__after_atomic();
return 0;
}

View File

@ -8,6 +8,11 @@
*/
extern struct swap_info_struct *swap_info[];
extern unsigned long generic_max_swapfile_size(void);
extern unsigned long max_swapfile_size(void);
unsigned long arch_max_swapfile_size(void);
/* Maximum swapfile size supported for the arch (not inclusive). */
extern unsigned long swapfile_maximum_size;
/* Whether swap migration entry supports storing A/D bits for the arch */
extern bool swap_migration_ad_supported;
#endif /* _LINUX_SWAPFILE_H */

View File

@ -8,6 +8,10 @@
#ifdef CONFIG_MMU
#ifdef CONFIG_SWAP
#include <linux/swapfile.h>
#endif /* CONFIG_SWAP */
/*
* swapcache pages are stored in the swapper_space radix tree. We want to
* get good packing density in that tree, so the index should be dense in
@ -23,6 +27,45 @@
#define SWP_TYPE_SHIFT (BITS_PER_XA_VALUE - MAX_SWAPFILES_SHIFT)
#define SWP_OFFSET_MASK ((1UL << SWP_TYPE_SHIFT) - 1)
/*
* Definitions only for PFN swap entries (see is_pfn_swap_entry()). To
* store PFN, we only need SWP_PFN_BITS bits. Each of the pfn swap entries
* can use the extra bits to store other information besides PFN.
*/
#ifdef MAX_PHYSMEM_BITS
#define SWP_PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT)
#else /* MAX_PHYSMEM_BITS */
#define SWP_PFN_BITS (BITS_PER_LONG - PAGE_SHIFT)
#endif /* MAX_PHYSMEM_BITS */
#define SWP_PFN_MASK (BIT(SWP_PFN_BITS) - 1)
/**
* Migration swap entry specific bitfield definitions. Layout:
*
* |----------+--------------------|
* | swp_type | swp_offset |
* |----------+--------+-+-+-------|
* | | resv |D|A| PFN |
* |----------+--------+-+-+-------|
*
* @SWP_MIG_YOUNG_BIT: Whether the page used to have young bit set (bit A)
* @SWP_MIG_DIRTY_BIT: Whether the page used to have dirty bit set (bit D)
*
* Note: A/D bits will be stored in migration entries iff there're enough
* free bits in arch specific swp offset. By default we'll ignore A/D bits
* when migrating a page. Please refer to migration_entry_supports_ad()
* for more information. If there're more bits besides PFN and A/D bits,
* they should be reserved and always be zeros.
*/
#define SWP_MIG_YOUNG_BIT (SWP_PFN_BITS)
#define SWP_MIG_DIRTY_BIT (SWP_PFN_BITS + 1)
#define SWP_MIG_TOTAL_BITS (SWP_PFN_BITS + 2)
#define SWP_MIG_YOUNG BIT(SWP_MIG_YOUNG_BIT)
#define SWP_MIG_DIRTY BIT(SWP_MIG_DIRTY_BIT)
static inline bool is_pfn_swap_entry(swp_entry_t entry);
/* Clear all flags but only keep swp_entry_t related information */
static inline pte_t pte_swp_clear_flags(pte_t pte)
{
@ -64,6 +107,17 @@ static inline pgoff_t swp_offset(swp_entry_t entry)
return entry.val & SWP_OFFSET_MASK;
}
/*
* This should only be called upon a pfn swap entry to get the PFN stored
* in the swap entry. Please refers to is_pfn_swap_entry() for definition
* of pfn swap entry.
*/
static inline unsigned long swp_offset_pfn(swp_entry_t entry)
{
VM_BUG_ON(!is_pfn_swap_entry(entry));
return swp_offset(entry) & SWP_PFN_MASK;
}
/* check whether a pte points to a swap entry */
static inline int is_swap_pte(pte_t pte)
{
@ -240,6 +294,52 @@ static inline swp_entry_t make_writable_migration_entry(pgoff_t offset)
return swp_entry(SWP_MIGRATION_WRITE, offset);
}
/*
* Returns whether the host has large enough swap offset field to support
* carrying over pgtable A/D bits for page migrations. The result is
* pretty much arch specific.
*/
static inline bool migration_entry_supports_ad(void)
{
#ifdef CONFIG_SWAP
return swap_migration_ad_supported;
#else /* CONFIG_SWAP */
return false;
#endif /* CONFIG_SWAP */
}
static inline swp_entry_t make_migration_entry_young(swp_entry_t entry)
{
if (migration_entry_supports_ad())
return swp_entry(swp_type(entry),
swp_offset(entry) | SWP_MIG_YOUNG);
return entry;
}
static inline bool is_migration_entry_young(swp_entry_t entry)
{
if (migration_entry_supports_ad())
return swp_offset(entry) & SWP_MIG_YOUNG;
/* Keep the old behavior of aging page after migration */
return false;
}
static inline swp_entry_t make_migration_entry_dirty(swp_entry_t entry)
{
if (migration_entry_supports_ad())
return swp_entry(swp_type(entry),
swp_offset(entry) | SWP_MIG_DIRTY);
return entry;
}
static inline bool is_migration_entry_dirty(swp_entry_t entry)
{
if (migration_entry_supports_ad())
return swp_offset(entry) & SWP_MIG_DIRTY;
/* Keep the old behavior of clean page after migration */
return false;
}
extern void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
spinlock_t *ptl);
extern void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
@ -247,8 +347,8 @@ extern void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
#ifdef CONFIG_HUGETLB_PAGE
extern void __migration_entry_wait_huge(pte_t *ptep, spinlock_t *ptl);
extern void migration_entry_wait_huge(struct vm_area_struct *vma, pte_t *pte);
#endif
#else
#endif /* CONFIG_HUGETLB_PAGE */
#else /* CONFIG_MIGRATION */
static inline swp_entry_t make_readable_migration_entry(pgoff_t offset)
{
return swp_entry(0, 0);
@ -276,7 +376,7 @@ static inline void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
#ifdef CONFIG_HUGETLB_PAGE
static inline void __migration_entry_wait_huge(pte_t *ptep, spinlock_t *ptl) { }
static inline void migration_entry_wait_huge(struct vm_area_struct *vma, pte_t *pte) { }
#endif
#endif /* CONFIG_HUGETLB_PAGE */
static inline int is_writable_migration_entry(swp_entry_t entry)
{
return 0;
@ -286,7 +386,26 @@ static inline int is_readable_migration_entry(swp_entry_t entry)
return 0;
}
#endif
static inline swp_entry_t make_migration_entry_young(swp_entry_t entry)
{
return entry;
}
static inline bool is_migration_entry_young(swp_entry_t entry)
{
return false;
}
static inline swp_entry_t make_migration_entry_dirty(swp_entry_t entry)
{
return entry;
}
static inline bool is_migration_entry_dirty(swp_entry_t entry)
{
return false;
}
#endif /* CONFIG_MIGRATION */
typedef unsigned long pte_marker;
@ -369,7 +488,7 @@ static inline int pte_none_mostly(pte_t pte)
static inline struct page *pfn_swap_entry_to_page(swp_entry_t entry)
{
struct page *p = pfn_to_page(swp_offset(entry));
struct page *p = pfn_to_page(swp_offset_pfn(entry));
/*
* Any use of migration entries may only occur while the
@ -387,6 +506,9 @@ static inline struct page *pfn_swap_entry_to_page(swp_entry_t entry)
*/
static inline bool is_pfn_swap_entry(swp_entry_t entry)
{
/* Make sure the swp offset can always store the needed fields */
BUILD_BUG_ON(SWP_TYPE_SHIFT < SWP_PFN_BITS);
return is_migration_entry(entry) || is_device_private_entry(entry) ||
is_device_exclusive_entry(entry);
}
@ -426,7 +548,7 @@ static inline int is_pmd_migration_entry(pmd_t pmd)
{
return is_swap_pmd(pmd) && is_migration_entry(pmd_to_swp_entry(pmd));
}
#else
#else /* CONFIG_ARCH_ENABLE_THP_MIGRATION */
static inline int set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw,
struct page *page)
{
@ -455,7 +577,7 @@ static inline int is_pmd_migration_entry(pmd_t pmd)
{
return 0;
}
#endif
#endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */
#ifdef CONFIG_MEMORY_FAILURE
@ -475,27 +597,17 @@ static inline int is_hwpoison_entry(swp_entry_t entry)
return swp_type(entry) == SWP_HWPOISON;
}
static inline unsigned long hwpoison_entry_to_pfn(swp_entry_t entry)
{
return swp_offset(entry);
}
static inline void num_poisoned_pages_inc(void)
{
atomic_long_inc(&num_poisoned_pages);
}
static inline void num_poisoned_pages_dec(void)
{
atomic_long_dec(&num_poisoned_pages);
}
static inline void num_poisoned_pages_sub(long i)
{
atomic_long_sub(i, &num_poisoned_pages);
}
#else
#else /* CONFIG_MEMORY_FAILURE */
static inline swp_entry_t make_hwpoison_entry(struct page *page)
{
@ -514,7 +626,7 @@ static inline void num_poisoned_pages_inc(void)
static inline void num_poisoned_pages_sub(long i)
{
}
#endif
#endif /* CONFIG_MEMORY_FAILURE */
static inline int non_swap_entry(swp_entry_t entry)
{

View File

@ -158,6 +158,13 @@ struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags)
else
folio_ref_add(folio,
refs * (GUP_PIN_COUNTING_BIAS - 1));
/*
* Adjust the pincount before re-checking the PTE for changes.
* This is essentially a smp_mb() and is paired with a memory
* barrier in page_try_share_anon_rmap().
*/
smp_mb__after_atomic();
node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs);
return folio;

View File

@ -253,7 +253,7 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
cpu_flags = HMM_PFN_VALID;
if (is_writable_device_private_entry(entry))
cpu_flags |= HMM_PFN_WRITE;
*hmm_pfn = swp_offset(entry) | cpu_flags;
*hmm_pfn = swp_offset_pfn(entry) | cpu_flags;
return 0;
}

View File

@ -2037,7 +2037,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
pgtable_t pgtable;
pmd_t old_pmd, _pmd;
bool young, write, soft_dirty, pmd_migration = false, uffd_wp = false;
bool anon_exclusive = false;
bool anon_exclusive = false, dirty = false;
unsigned long addr;
int i;
@ -2121,13 +2121,16 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
write = is_writable_migration_entry(entry);
if (PageAnon(page))
anon_exclusive = is_readable_exclusive_migration_entry(entry);
young = false;
young = is_migration_entry_young(entry);
dirty = is_migration_entry_dirty(entry);
soft_dirty = pmd_swp_soft_dirty(old_pmd);
uffd_wp = pmd_swp_uffd_wp(old_pmd);
} else {
page = pmd_page(old_pmd);
if (pmd_dirty(old_pmd))
if (pmd_dirty(old_pmd)) {
dirty = true;
SetPageDirty(page);
}
write = pmd_write(old_pmd);
young = pmd_young(old_pmd);
soft_dirty = pmd_soft_dirty(old_pmd);
@ -2148,6 +2151,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
*
* In case we cannot clear PageAnonExclusive(), split the PMD
* only and let try_to_migrate_one() fail later.
*
* See page_try_share_anon_rmap(): invalidate PMD first.
*/
anon_exclusive = PageAnon(page) && PageAnonExclusive(page);
if (freeze && anon_exclusive && page_try_share_anon_rmap(page))
@ -2179,6 +2184,10 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
else
swp_entry = make_readable_migration_entry(
page_to_pfn(page + i));
if (young)
swp_entry = make_migration_entry_young(swp_entry);
if (dirty)
swp_entry = make_migration_entry_dirty(swp_entry);
entry = swp_entry_to_pte(swp_entry);
if (soft_dirty)
entry = pte_swp_mksoft_dirty(entry);
@ -2193,6 +2202,9 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
entry = pte_wrprotect(entry);
if (!young)
entry = pte_mkold(entry);
/* NOTE: this may set soft-dirty too on some archs */
if (dirty)
entry = pte_mkdirty(entry);
if (soft_dirty)
entry = pte_mksoft_dirty(entry);
if (uffd_wp)
@ -3179,6 +3191,7 @@ int set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw,
flush_cache_range(vma, address, address + HPAGE_PMD_SIZE);
pmdval = pmdp_invalidate(vma, address, pvmw->pmd);
/* See page_try_share_anon_rmap(): invalidate PMD first. */
anon_exclusive = PageAnon(page) && PageAnonExclusive(page);
if (anon_exclusive && page_try_share_anon_rmap(page)) {
set_pmd_at(mm, address, pvmw->pmd, pmdval);
@ -3193,6 +3206,10 @@ int set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw,
entry = make_readable_exclusive_migration_entry(page_to_pfn(page));
else
entry = make_readable_migration_entry(page_to_pfn(page));
if (pmd_young(pmdval))
entry = make_migration_entry_young(entry);
if (pmd_dirty(pmdval))
entry = make_migration_entry_dirty(entry);
pmdswp = swp_entry_to_pmd(entry);
if (pmd_soft_dirty(pmdval))
pmdswp = pmd_swp_mksoft_dirty(pmdswp);
@ -3218,13 +3235,18 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
entry = pmd_to_swp_entry(*pvmw->pmd);
get_page(new);
pmde = pmd_mkold(mk_huge_pmd(new, READ_ONCE(vma->vm_page_prot)));
pmde = mk_huge_pmd(new, READ_ONCE(vma->vm_page_prot));
if (pmd_swp_soft_dirty(*pvmw->pmd))
pmde = pmd_mksoft_dirty(pmde);
if (is_writable_migration_entry(entry))
pmde = maybe_pmd_mkwrite(pmde, vma);
if (pmd_swp_uffd_wp(*pvmw->pmd))
pmde = pmd_wrprotect(pmd_mkuffd_wp(pmde));
if (!is_migration_entry_young(entry))
pmde = pmd_mkold(pmde);
/* NOTE: this may contain setting soft-dirty on some archs */
if (PageDirty(new) && is_migration_entry_dirty(entry))
pmde = pmd_mkdirty(pmde);
if (PageAnon(new)) {
rmap_t rmap_flags = RMAP_COMPOUND;

View File

@ -2438,6 +2438,8 @@ int madvise_collapse(struct vm_area_struct *vma, struct vm_area_struct **prev,
last_fail = result;
goto out_nolock;
}
hend = vma->vm_end & HPAGE_PMD_MASK;
}
mmap_assert_locked(mm);
memset(cc->node_load, 0, sizeof(cc->node_load));

View File

@ -604,9 +604,8 @@ static int __save_stack_trace(unsigned long *trace)
* memory block and add it to the object_list and object_tree_root (or
* object_phys_tree_root).
*/
static struct kmemleak_object *__create_object(unsigned long ptr, size_t size,
int min_count, gfp_t gfp,
bool is_phys)
static void __create_object(unsigned long ptr, size_t size,
int min_count, gfp_t gfp, bool is_phys)
{
unsigned long flags;
struct kmemleak_object *object, *parent;
@ -618,7 +617,7 @@ static struct kmemleak_object *__create_object(unsigned long ptr, size_t size,
if (!object) {
pr_warn("Cannot allocate a kmemleak_object structure\n");
kmemleak_disable();
return NULL;
return;
}
INIT_LIST_HEAD(&object->object_list);
@ -687,7 +686,6 @@ static struct kmemleak_object *__create_object(unsigned long ptr, size_t size,
*/
dump_object_info(parent);
kmem_cache_free(object_cache, object);
object = NULL;
goto out;
}
}
@ -698,21 +696,20 @@ static struct kmemleak_object *__create_object(unsigned long ptr, size_t size,
list_add_tail_rcu(&object->object_list, &object_list);
out:
raw_spin_unlock_irqrestore(&kmemleak_lock, flags);
return object;
}
/* Create kmemleak object which allocated with virtual address. */
static struct kmemleak_object *create_object(unsigned long ptr, size_t size,
int min_count, gfp_t gfp)
static void create_object(unsigned long ptr, size_t size,
int min_count, gfp_t gfp)
{
return __create_object(ptr, size, min_count, gfp, false);
__create_object(ptr, size, min_count, gfp, false);
}
/* Create kmemleak object which allocated with physical address. */
static struct kmemleak_object *create_object_phys(unsigned long ptr, size_t size,
int min_count, gfp_t gfp)
static void create_object_phys(unsigned long ptr, size_t size,
int min_count, gfp_t gfp)
{
return __create_object(ptr, size, min_count, gfp, true);
__create_object(ptr, size, min_count, gfp, true);
}
/*

View File

@ -1095,6 +1095,7 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
goto out_unlock;
}
/* See page_try_share_anon_rmap(): clear PTE first. */
if (anon_exclusive && page_try_share_anon_rmap(page)) {
set_pte_at(mm, pvmw.address, pvmw.pte, entry);
goto out_unlock;

View File

@ -277,7 +277,7 @@ static int kill_proc(struct to_kill *tk, unsigned long pfn, int flags)
* to SIG_IGN, but hopefully no one will do that?
*/
ret = send_sig_mceerr(BUS_MCEERR_AO, (void __user *)tk->addr,
addr_lsb, t); /* synchronous? */
addr_lsb, t);
if (ret < 0)
pr_info("Error sending signal to %s:%d: %d\n",
t->comm, t->pid, ret);
@ -521,11 +521,11 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
anon_vma_interval_tree_foreach(vmac, &av->rb_root,
pgoff, pgoff) {
vma = vmac->vma;
if (vma->vm_mm != t->mm)
continue;
if (!page_mapped_in_vma(page, vma))
continue;
if (vma->vm_mm == t->mm)
add_to_kill(t, page, FSDAX_INVALID_PGOFF, vma,
to_kill);
add_to_kill(t, page, FSDAX_INVALID_PGOFF, vma, to_kill);
}
}
read_unlock(&tasklist_lock);
@ -635,7 +635,7 @@ static int check_hwpoisoned_entry(pte_t pte, unsigned long addr, short shift,
swp_entry_t swp = pte_to_swp_entry(pte);
if (is_hwpoison_entry(swp))
pfn = hwpoison_entry_to_pfn(swp);
pfn = swp_offset_pfn(swp);
}
if (!pfn || pfn != poisoned_pfn)
@ -1249,9 +1249,9 @@ static int __get_hwpoison_page(struct page *page, unsigned long flags)
return ret;
/*
* This check prevents from calling get_hwpoison_unless_zero()
* for any unsupported type of page in order to reduce the risk of
* unexpected races caused by taking a page refcount.
* This check prevents from calling get_page_unless_zero() for any
* unsupported type of page in order to reduce the risk of unexpected
* races caused by taking a page refcount.
*/
if (!HWPoisonHandlable(head, flags))
return -EBUSY;
@ -1409,7 +1409,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
* Here we are interested only in user-mapped pages, so skip any
* other types of pages.
*/
if (PageReserved(p) || PageSlab(p))
if (PageReserved(p) || PageSlab(p) || PageTable(p))
return true;
if (!(PageLRU(hpage) || PageHuge(p)))
return true;
@ -2028,7 +2028,7 @@ int memory_failure(unsigned long pfn, int flags)
/*
* We need/can do nothing about count=0 pages.
* 1) it's a free page, and therefore in safe hand:
* prep_new_page() will be the gate keeper.
* check_new_page() will be the gate keeper.
* 2) it's part of a non-compound high order page.
* Implies some kernel user: cannot stop them from
* R/W the page; let's pray that the page has been
@ -2131,7 +2131,7 @@ int memory_failure(unsigned long pfn, int flags)
page_flags = p->flags;
if (hwpoison_filter(p)) {
TestClearPageHWPoison(p);
ClearPageHWPoison(p);
unlock_page(p);
put_page(p);
res = -EOPNOTSUPP;
@ -2407,24 +2407,26 @@ EXPORT_SYMBOL(unpoison_memory);
static bool isolate_page(struct page *page, struct list_head *pagelist)
{
bool isolated = false;
bool lru = PageLRU(page);
if (PageHuge(page)) {
isolated = !isolate_hugetlb(page, pagelist);
} else {
bool lru = !__PageMovable(page);
if (lru)
isolated = !isolate_lru_page(page);
else
isolated = !isolate_movable_page(page, ISOLATE_UNEVICTABLE);
isolated = !isolate_movable_page(page,
ISOLATE_UNEVICTABLE);
if (isolated)
if (isolated) {
list_add(&page->lru, pagelist);
if (lru)
inc_node_page_state(page, NR_ISOLATED_ANON +
page_is_file_lru(page));
}
}
if (isolated && lru)
inc_node_page_state(page, NR_ISOLATED_ANON +
page_is_file_lru(page));
/*
* If we succeed to isolate the page, we grabbed another refcount on
* the page, so we can safely drop the one we got from get_any_pages().
@ -2600,7 +2602,7 @@ int soft_offline_page(unsigned long pfn, int flags)
void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
{
int i;
int i, total = 0;
/*
* A further optimization is to have per section refcounted
@ -2613,8 +2615,10 @@ void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
for (i = 0; i < nr_pages; i++) {
if (PageHWPoison(&memmap[i])) {
num_poisoned_pages_dec();
total++;
ClearPageHWPoison(&memmap[i]);
}
}
if (total)
num_poisoned_pages_sub(total);
}

View File

@ -454,7 +454,7 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
/* fall back to slow path lookup */
rcu_read_lock();
pgmap = xa_load(&pgmap_array, PHYS_PFN(phys));
if (pgmap && !percpu_ref_tryget_live(&pgmap->ref))
if (pgmap && !percpu_ref_tryget_live_rcu(&pgmap->ref))
pgmap = NULL;
rcu_read_unlock();

View File

@ -198,7 +198,7 @@ static bool remove_migration_pte(struct folio *folio,
#endif
folio_get(folio);
pte = pte_mkold(mk_pte(new, READ_ONCE(vma->vm_page_prot)));
pte = mk_pte(new, READ_ONCE(vma->vm_page_prot));
if (pte_swp_soft_dirty(*pvmw.pte))
pte = pte_mksoft_dirty(pte);
@ -206,6 +206,10 @@ static bool remove_migration_pte(struct folio *folio,
* Recheck VMA as permissions can change since migration started
*/
entry = pte_to_swp_entry(*pvmw.pte);
if (!is_migration_entry_young(entry))
pte = pte_mkold(pte);
if (folio_test_dirty(folio) && is_migration_entry_dirty(entry))
pte = pte_mkdirty(pte);
if (is_writable_migration_entry(entry))
pte = maybe_mkwrite(pte, vma);
else if (pte_swp_uffd_wp(*pvmw.pte))

View File

@ -233,6 +233,12 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
else
entry = make_readable_migration_entry(
page_to_pfn(page));
if (pte_present(pte)) {
if (pte_young(pte))
entry = make_migration_entry_young(entry);
if (pte_dirty(pte))
entry = make_migration_entry_dirty(entry);
}
swp_pte = swp_entry_to_pte(entry);
if (pte_present(pte)) {
if (pte_soft_dirty(pte))

View File

@ -3600,16 +3600,11 @@ EXPORT_SYMBOL_GPL(split_page);
int __isolate_free_page(struct page *page, unsigned int order)
{
unsigned long watermark;
struct zone *zone;
int mt;
BUG_ON(!PageBuddy(page));
zone = page_zone(page);
mt = get_pageblock_migratetype(page);
struct zone *zone = page_zone(page);
int mt = get_pageblock_migratetype(page);
if (!is_migrate_isolate(mt)) {
unsigned long watermark;
/*
* Obey watermarks as if the page was being allocated. We can
* emulate a high-order watermark check with a raised order-0
@ -3623,8 +3618,6 @@ int __isolate_free_page(struct page *page, unsigned int order)
__mod_zone_freepage_state(zone, -(1UL << order), mt);
}
/* Remove page from free list */
del_page_from_free_list(page, zone, order);
/*
@ -3645,7 +3638,6 @@ int __isolate_free_page(struct page *page, unsigned int order)
}
}
return 1UL << order;
}

View File

@ -86,7 +86,7 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw)
!is_device_exclusive_entry(entry))
return false;
pfn = swp_offset(entry);
pfn = swp_offset_pfn(entry);
} else if (is_swap_pte(*pvmw->pte)) {
swp_entry_t entry;
@ -96,7 +96,7 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw)
!is_device_exclusive_entry(entry))
return false;
pfn = swp_offset(entry);
pfn = swp_offset_pfn(entry);
} else {
if (!pte_present(*pvmw->pte))
return false;
@ -221,7 +221,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
return not_found(pvmw);
entry = pmd_to_swp_entry(pmde);
if (!is_migration_entry(entry) ||
!check_pmd(swp_offset(entry), pvmw))
!check_pmd(swp_offset_pfn(entry), pvmw))
return not_found(pvmw);
return true;
}

View File

@ -1577,11 +1577,8 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
pteval = huge_ptep_clear_flush(vma, address, pvmw.pte);
} else {
flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
/*
* Nuke the page table entry. When having to clear
* PageAnonExclusive(), we always have to flush.
*/
if (should_defer_flush(mm, flags) && !anon_exclusive) {
/* Nuke the page table entry. */
if (should_defer_flush(mm, flags)) {
/*
* We clear the PTE but do not flush so potentially
* a remote CPU could still be writing to the folio.
@ -1712,6 +1709,8 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
page_vma_mapped_walk_done(&pvmw);
break;
}
/* See page_try_share_anon_rmap(): clear PTE first. */
if (anon_exclusive &&
page_try_share_anon_rmap(subpage)) {
swap_free(entry);
@ -2043,6 +2042,8 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
}
VM_BUG_ON_PAGE(pte_write(pteval) && folio_test_anon(folio) &&
!anon_exclusive, subpage);
/* See page_try_share_anon_rmap(): clear PTE first. */
if (anon_exclusive &&
page_try_share_anon_rmap(subpage)) {
if (folio_test_hugetlb(folio))
@ -2068,7 +2069,10 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
else
entry = make_readable_migration_entry(
page_to_pfn(subpage));
if (pte_young(pteval))
entry = make_migration_entry_young(entry);
if (pte_dirty(pteval))
entry = make_migration_entry_dirty(entry);
swp_pte = swp_entry_to_pte(entry);
if (pte_soft_dirty(pteval))
swp_pte = pte_swp_mksoft_dirty(swp_pte);

View File

@ -63,6 +63,10 @@ EXPORT_SYMBOL_GPL(nr_swap_pages);
/* protected with swap_lock. reading in vm_swap_full() doesn't need lock */
long total_swap_pages;
static int least_priority = -1;
unsigned long swapfile_maximum_size;
#ifdef CONFIG_MIGRATION
bool swap_migration_ad_supported;
#endif /* CONFIG_MIGRATION */
static const char Bad_file[] = "Bad swap file entry ";
static const char Unused_file[] = "Unused swap file entry ";
@ -2816,7 +2820,7 @@ unsigned long generic_max_swapfile_size(void)
}
/* Can be overridden by an architecture for additional checks. */
__weak unsigned long max_swapfile_size(void)
__weak unsigned long arch_max_swapfile_size(void)
{
return generic_max_swapfile_size();
}
@ -2856,7 +2860,7 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
p->cluster_next = 1;
p->cluster_nr = 0;
maxpages = max_swapfile_size();
maxpages = swapfile_maximum_size;
last_page = swap_header->info.last_page;
if (!last_page) {
pr_warn("Empty swap-file\n");
@ -3677,6 +3681,13 @@ static int __init swapfile_init(void)
for_each_node(nid)
plist_head_init(&swap_avail_heads[nid]);
swapfile_maximum_size = arch_max_swapfile_size();
#ifdef CONFIG_MIGRATION
if (swapfile_maximum_size >= (1UL << SWP_MIG_TOTAL_BITS))
swap_migration_ad_supported = true;
#endif /* CONFIG_MIGRATION */
return 0;
}
subsys_initcall(swapfile_init);

View File

@ -30,7 +30,6 @@ map_fixed_noreplace
write_to_hugetlbfs
hmm-tests
memfd_secret
local_config.*
soft-dirty
split_huge_page_test
ksm_tests