thp: allocate memory in khugepaged outside of mmap_sem write mode
This tries to be more friendly to filesystem in userland, with userland backends that allocate memory in the I/O paths and that could deadlock if khugepaged holds the mmap_sem write mode of the userland backend while allocating memory. Memory allocation may wait for writeback I/O completion from the daemon that may be blocked in the mmap_sem read mode if a page fault happens and the daemon wasn't using mlock for the memory required for the I/O submission and completion. Signed-off-by: Andrea Arcangeli <aarcange@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
0bbbc0b33d
commit
ce83d2174e
@ -1664,9 +1664,9 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
|
|||||||
|
|
||||||
static void collapse_huge_page(struct mm_struct *mm,
|
static void collapse_huge_page(struct mm_struct *mm,
|
||||||
unsigned long address,
|
unsigned long address,
|
||||||
struct page **hpage)
|
struct page **hpage,
|
||||||
|
struct vm_area_struct *vma)
|
||||||
{
|
{
|
||||||
struct vm_area_struct *vma;
|
|
||||||
pgd_t *pgd;
|
pgd_t *pgd;
|
||||||
pud_t *pud;
|
pud_t *pud;
|
||||||
pmd_t *pmd, _pmd;
|
pmd_t *pmd, _pmd;
|
||||||
@ -1680,9 +1680,34 @@ static void collapse_huge_page(struct mm_struct *mm,
|
|||||||
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
|
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
|
||||||
#ifndef CONFIG_NUMA
|
#ifndef CONFIG_NUMA
|
||||||
VM_BUG_ON(!*hpage);
|
VM_BUG_ON(!*hpage);
|
||||||
|
new_page = *hpage;
|
||||||
#else
|
#else
|
||||||
VM_BUG_ON(*hpage);
|
VM_BUG_ON(*hpage);
|
||||||
|
/*
|
||||||
|
* Allocate the page while the vma is still valid and under
|
||||||
|
* the mmap_sem read mode so there is no memory allocation
|
||||||
|
* later when we take the mmap_sem in write mode. This is more
|
||||||
|
* friendly behavior (OTOH it may actually hide bugs) to
|
||||||
|
* filesystems in userland with daemons allocating memory in
|
||||||
|
* the userland I/O paths. Allocating memory with the
|
||||||
|
* mmap_sem in read mode is good idea also to allow greater
|
||||||
|
* scalability.
|
||||||
|
*/
|
||||||
|
new_page = alloc_hugepage_vma(khugepaged_defrag(), vma, address);
|
||||||
|
if (unlikely(!new_page)) {
|
||||||
|
up_read(&mm->mmap_sem);
|
||||||
|
*hpage = ERR_PTR(-ENOMEM);
|
||||||
|
return;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) {
|
||||||
|
up_read(&mm->mmap_sem);
|
||||||
|
put_page(new_page);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* after allocating the hugepage upgrade to mmap_sem write mode */
|
||||||
|
up_read(&mm->mmap_sem);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Prevent all access to pagetables with the exception of
|
* Prevent all access to pagetables with the exception of
|
||||||
@ -1720,18 +1745,6 @@ static void collapse_huge_page(struct mm_struct *mm,
|
|||||||
if (!pmd_present(*pmd) || pmd_trans_huge(*pmd))
|
if (!pmd_present(*pmd) || pmd_trans_huge(*pmd))
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
#ifndef CONFIG_NUMA
|
|
||||||
new_page = *hpage;
|
|
||||||
#else
|
|
||||||
new_page = alloc_hugepage_vma(khugepaged_defrag(), vma, address);
|
|
||||||
if (unlikely(!new_page)) {
|
|
||||||
*hpage = ERR_PTR(-ENOMEM);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL)))
|
|
||||||
goto out_put_page;
|
|
||||||
|
|
||||||
anon_vma_lock(vma->anon_vma);
|
anon_vma_lock(vma->anon_vma);
|
||||||
|
|
||||||
pte = pte_offset_map(pmd, address);
|
pte = pte_offset_map(pmd, address);
|
||||||
@ -1759,7 +1772,7 @@ static void collapse_huge_page(struct mm_struct *mm,
|
|||||||
spin_unlock(&mm->page_table_lock);
|
spin_unlock(&mm->page_table_lock);
|
||||||
anon_vma_unlock(vma->anon_vma);
|
anon_vma_unlock(vma->anon_vma);
|
||||||
mem_cgroup_uncharge_page(new_page);
|
mem_cgroup_uncharge_page(new_page);
|
||||||
goto out_put_page;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1798,15 +1811,15 @@ static void collapse_huge_page(struct mm_struct *mm,
|
|||||||
*hpage = NULL;
|
*hpage = NULL;
|
||||||
#endif
|
#endif
|
||||||
khugepaged_pages_collapsed++;
|
khugepaged_pages_collapsed++;
|
||||||
out:
|
out_up_write:
|
||||||
up_write(&mm->mmap_sem);
|
up_write(&mm->mmap_sem);
|
||||||
return;
|
return;
|
||||||
|
|
||||||
out_put_page:
|
out:
|
||||||
#ifdef CONFIG_NUMA
|
#ifdef CONFIG_NUMA
|
||||||
put_page(new_page);
|
put_page(new_page);
|
||||||
#endif
|
#endif
|
||||||
goto out;
|
goto out_up_write;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int khugepaged_scan_pmd(struct mm_struct *mm,
|
static int khugepaged_scan_pmd(struct mm_struct *mm,
|
||||||
@ -1865,10 +1878,9 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
|
|||||||
ret = 1;
|
ret = 1;
|
||||||
out_unmap:
|
out_unmap:
|
||||||
pte_unmap_unlock(pte, ptl);
|
pte_unmap_unlock(pte, ptl);
|
||||||
if (ret) {
|
if (ret)
|
||||||
up_read(&mm->mmap_sem);
|
/* collapse_huge_page will return with the mmap_sem released */
|
||||||
collapse_huge_page(mm, address, hpage);
|
collapse_huge_page(mm, address, hpage, vma);
|
||||||
}
|
|
||||||
out:
|
out:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user