ANDROID: mm: Fix SPF-aware fast-mremap

Fast-mremap tries to detect concerrent SPF's by checking if vma->file_ref_count is 1 and setting it to -1 to prevent concurrent SPF's from happening after a fast-mremap has started. This check doesn't account for the cases where the VMA has been split/merged after the check has happened in the SPF or fast-remap paths; which can lead to the PMD/PUD level page table being changed from under a concurrent SPF if certain race conditions are met. Currently SPF takes the PTL lock at the lowest level of the page table hierarchy (i.e. the page table page covering a 2MB extent). Fast mremap at the PMD level only takes the PTL lock for the page table pages (old and new) containing the PMD entries to be updated (i.e the page table pages covering 1GB extents) Remove the vma_ref_count trylock, and disable fast-mremap at the PUD level if SPF is enabled. At the PMD level take the pte ptl lock on the source address, to avoid races with concurrent SPF. NOTE: The additional lock, means that there are now 2 levels of nested spinlocks. However we forego adding a new lockdep subclass as Android doesn't enable lockdep in production. old_ptl (pmd_lock) new_ptl (pmd_lock) old_pte_ptl (pte ptl lock) Bug: 377672115 Change-Id: Ie634806115ce86a05477dfe45806487c856c3759 Signed-off-by: Kalesh Singh <kaleshsingh@google.com>
2024-11-25 10:21:41 -08:00 · 2024-11-25 10:21:41 -08:00 · 1fed2de8e6
commit 1fed2de8e6
parent b396c229b4
1 changed files with 24 additions and 53 deletions
--- a/mm/mremap.c
+++ b/mm/mremap.c
@ -210,44 +210,11 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 		drop_rmap_locks(vma);
 }

-#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
-static inline bool trylock_vma_ref_count(struct vm_area_struct *vma)
-{
-	/*
-	 * If we have the only reference, swap the refcount to -1. This
-	 * will prevent other concurrent references by get_vma() for SPFs.
-	 */
-	return atomic_cmpxchg_acquire(&vma->vm_ref_count, 1, -1) == 1;
-}
-
-/*
- * Restore the VMA reference count to 1 after a fast mremap.
- */
-static inline void unlock_vma_ref_count(struct vm_area_struct *vma)
-{
-	int old = atomic_xchg_release(&vma->vm_ref_count, 1);
-
-	/*
-	 * This should only be called after a corresponding,
-	 * successful trylock_vma_ref_count().
-	 */
-	VM_BUG_ON_VMA(old != -1, vma);
-}
-#else	/* !CONFIG_SPECULATIVE_PAGE_FAULT */
-static inline bool trylock_vma_ref_count(struct vm_area_struct *vma)
-{
-	return true;
-}
-static inline void unlock_vma_ref_count(struct vm_area_struct *vma)
-{
-}
-#endif	/* CONFIG_SPECULATIVE_PAGE_FAULT */
-
 #ifdef CONFIG_HAVE_MOVE_PMD
 static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
 		  unsigned long new_addr, pmd_t *old_pmd, pmd_t *new_pmd)
 {
-	spinlock_t *old_ptl, *new_ptl;
+	spinlock_t *old_ptl, *new_ptl, *old_pte_ptl;
 	struct mm_struct *mm = vma->vm_mm;
 	pmd_t pmd;

@ -277,14 +244,6 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
 	if (WARN_ON_ONCE(!pmd_none(*new_pmd)))
 		return false;

-	/*
-	 * We hold both exclusive mmap_lock and rmap_lock at this point and
-	 * cannot block. If we cannot immediately take exclusive ownership
-	 * of the VMA fallback to the move_ptes().
-	 */
-	if (!trylock_vma_ref_count(vma))
-		return false;
-
 	/*
 	 * We don't have to worry about the ordering of src and dst
 	 * ptlocks because exclusive mmap_lock prevents deadlock.
@ -294,6 +253,24 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
 	if (new_ptl != old_ptl)
 		spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);

+	/*
+	 * If SPF is enabled, take the ptl lock on the source page table
+	 * page, to prevent the entire pmd from being moved under a
+	 * concurrent SPF.
+	 *
+	 * There is no need to take the destination ptl lock since, mremap
+	 * has already created a hole at the destination and freed the
+	 * corresponding page tables in the process.
+	 *
+	 * NOTE: If USE_SPLIT_PTE_PTLOCKS is false, then the old_ptl, new_ptl,
+	 * and the old_pte_ptl; are all the same lock (mm->page_table_lock).
+	 * Check that the locks are different to avoid a deadlock.
+	 */
+	old_pte_ptl = pte_lockptr(mm, old_pmd);
+	if (IS_ENABLED(CONFIG_SPECULATIVE_PAGE_FAULT) && old_pte_ptl != old_ptl)
+		spin_lock(old_pte_ptl);
+
+
 	/* Clear the pmd */
 	pmd = *old_pmd;
 	pmd_clear(old_pmd);
@ -303,11 +280,13 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
 	/* Set the new pmd */
 	set_pmd_at(mm, new_addr, new_pmd, pmd);
 	flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE);
+
+	if (IS_ENABLED(CONFIG_SPECULATIVE_PAGE_FAULT) && old_pte_ptl != old_ptl)
+		spin_unlock(old_pte_ptl);
 	if (new_ptl != old_ptl)
 		spin_unlock(new_ptl);
 	spin_unlock(old_ptl);

-	unlock_vma_ref_count(vma);
 	return true;
 }
 #else
@ -319,7 +298,8 @@ static inline bool move_normal_pmd(struct vm_area_struct *vma,
 }
 #endif

-#ifdef CONFIG_HAVE_MOVE_PUD
+#if CONFIG_PGTABLE_LEVELS > 2 && defined(CONFIG_HAVE_MOVE_PUD) && \
+		!defined(CONFIG_SPECULATIVE_PAGE_FAULT)
 static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr,
 		  unsigned long new_addr, pud_t *old_pud, pud_t *new_pud)
 {
@ -334,14 +314,6 @@ static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr,
 	if (WARN_ON_ONCE(!pud_none(*new_pud)))
 		return false;

-	/*
-	 * We hold both exclusive mmap_lock and rmap_lock at this point and
-	 * cannot block. If we cannot immediately take exclusive ownership
-	 * of the VMA fallback to the move_ptes().
-	 */
-	if (!trylock_vma_ref_count(vma))
-		return false;
-
 	/*
 	 * We don't have to worry about the ordering of src and dst
 	 * ptlocks because exclusive mmap_lock prevents deadlock.
@ -364,7 +336,6 @@ static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr,
 		spin_unlock(new_ptl);
 	spin_unlock(old_ptl);

-	unlock_vma_ref_count(vma);
 	return true;
 }
 #else