ANDROID: mm: Fix SPF-aware fast-mremap

Fast-mremap tries to detect concerrent SPF's by checking if
vma->file_ref_count is 1 and setting it to -1 to prevent concurrent
SPF's from happening after a fast-mremap has started. This check
doesn't account for the cases where the VMA has been split/merged
after the check has happened in the SPF or fast-remap paths; which
can lead to the PMD/PUD level page table being changed from under
a concurrent SPF if certain race conditions are met.

Currently SPF takes the PTL lock at the lowest level of the
page table hierarchy (i.e. the page table page covering a 2MB extent).

Fast mremap at the PMD level only takes the PTL lock for the
page table pages (old and new) containing the PMD entries to be
updated (i.e the page table pages covering 1GB extents)

Remove the vma_ref_count trylock, and disable fast-mremap at the PUD
level if SPF is enabled.

At the PMD level take the pte ptl lock on the source address,
to avoid races with concurrent SPF.

NOTE: The additional lock, means that there are now 2 levels of
      nested spinlocks. However we forego adding a new lockdep
      subclass as Android doesn't enable lockdep in production.

    old_ptl (pmd_lock)
        new_ptl (pmd_lock)
            old_pte_ptl (pte ptl lock)

Bug: 377672115
Change-Id: Ie634806115ce86a05477dfe45806487c856c3759
Signed-off-by: Kalesh Singh <kaleshsingh@google.com>
This commit is contained in:
Kalesh Singh 2024-11-25 10:21:41 -08:00
parent b396c229b4
commit 1fed2de8e6

View File

@ -210,44 +210,11 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
drop_rmap_locks(vma);
}
#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
static inline bool trylock_vma_ref_count(struct vm_area_struct *vma)
{
/*
* If we have the only reference, swap the refcount to -1. This
* will prevent other concurrent references by get_vma() for SPFs.
*/
return atomic_cmpxchg_acquire(&vma->vm_ref_count, 1, -1) == 1;
}
/*
* Restore the VMA reference count to 1 after a fast mremap.
*/
static inline void unlock_vma_ref_count(struct vm_area_struct *vma)
{
int old = atomic_xchg_release(&vma->vm_ref_count, 1);
/*
* This should only be called after a corresponding,
* successful trylock_vma_ref_count().
*/
VM_BUG_ON_VMA(old != -1, vma);
}
#else /* !CONFIG_SPECULATIVE_PAGE_FAULT */
static inline bool trylock_vma_ref_count(struct vm_area_struct *vma)
{
return true;
}
static inline void unlock_vma_ref_count(struct vm_area_struct *vma)
{
}
#endif /* CONFIG_SPECULATIVE_PAGE_FAULT */
#ifdef CONFIG_HAVE_MOVE_PMD
static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
unsigned long new_addr, pmd_t *old_pmd, pmd_t *new_pmd)
{
spinlock_t *old_ptl, *new_ptl;
spinlock_t *old_ptl, *new_ptl, *old_pte_ptl;
struct mm_struct *mm = vma->vm_mm;
pmd_t pmd;
@ -277,14 +244,6 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
if (WARN_ON_ONCE(!pmd_none(*new_pmd)))
return false;
/*
* We hold both exclusive mmap_lock and rmap_lock at this point and
* cannot block. If we cannot immediately take exclusive ownership
* of the VMA fallback to the move_ptes().
*/
if (!trylock_vma_ref_count(vma))
return false;
/*
* We don't have to worry about the ordering of src and dst
* ptlocks because exclusive mmap_lock prevents deadlock.
@ -294,6 +253,24 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
if (new_ptl != old_ptl)
spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
/*
* If SPF is enabled, take the ptl lock on the source page table
* page, to prevent the entire pmd from being moved under a
* concurrent SPF.
*
* There is no need to take the destination ptl lock since, mremap
* has already created a hole at the destination and freed the
* corresponding page tables in the process.
*
* NOTE: If USE_SPLIT_PTE_PTLOCKS is false, then the old_ptl, new_ptl,
* and the old_pte_ptl; are all the same lock (mm->page_table_lock).
* Check that the locks are different to avoid a deadlock.
*/
old_pte_ptl = pte_lockptr(mm, old_pmd);
if (IS_ENABLED(CONFIG_SPECULATIVE_PAGE_FAULT) && old_pte_ptl != old_ptl)
spin_lock(old_pte_ptl);
/* Clear the pmd */
pmd = *old_pmd;
pmd_clear(old_pmd);
@ -303,11 +280,13 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
/* Set the new pmd */
set_pmd_at(mm, new_addr, new_pmd, pmd);
flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE);
if (IS_ENABLED(CONFIG_SPECULATIVE_PAGE_FAULT) && old_pte_ptl != old_ptl)
spin_unlock(old_pte_ptl);
if (new_ptl != old_ptl)
spin_unlock(new_ptl);
spin_unlock(old_ptl);
unlock_vma_ref_count(vma);
return true;
}
#else
@ -319,7 +298,8 @@ static inline bool move_normal_pmd(struct vm_area_struct *vma,
}
#endif
#ifdef CONFIG_HAVE_MOVE_PUD
#if CONFIG_PGTABLE_LEVELS > 2 && defined(CONFIG_HAVE_MOVE_PUD) && \
!defined(CONFIG_SPECULATIVE_PAGE_FAULT)
static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr,
unsigned long new_addr, pud_t *old_pud, pud_t *new_pud)
{
@ -334,14 +314,6 @@ static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr,
if (WARN_ON_ONCE(!pud_none(*new_pud)))
return false;
/*
* We hold both exclusive mmap_lock and rmap_lock at this point and
* cannot block. If we cannot immediately take exclusive ownership
* of the VMA fallback to the move_ptes().
*/
if (!trylock_vma_ref_count(vma))
return false;
/*
* We don't have to worry about the ordering of src and dst
* ptlocks because exclusive mmap_lock prevents deadlock.
@ -364,7 +336,6 @@ static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr,
spin_unlock(new_ptl);
spin_unlock(old_ptl);
unlock_vma_ref_count(vma);
return true;
}
#else