ANDROID: mm: allow vmas with vm_ops to be speculatively handled

Right now only anonymous page faults are speculatively handled,
thus leaving out a large percentage of faults still requiring to
take mmap_sem. These were left out since there can be fault
handlers mainly in the fs layer which may use vma in unknown ways.
This patch enables speculative fault for ext4, f2fs and shmem. The
feature is disabled by default and enabled via allow_file_spec_access
kernel param.

Bug: 171954515
Change-Id: I0d23ebf299000e4ac5e2c71bc0b7fc9006e98da9
Signed-off-by: Vinayak Menon <vinmenon@codeaurora.org>
This commit is contained in:
Vinayak Menon 2021-03-18 15:20:17 +05:30 committed by Todd Kjos
parent 77e791866b
commit 35eacb5c87
10 changed files with 170 additions and 37 deletions

View File

@ -289,6 +289,12 @@
do not want to use tracing_snapshot_alloc() as it needs
to be done where GFP_KERNEL allocations are allowed.
allow_file_spec_access
Allow speculative faults on file backed pages.
Speculative faults are enabled only for those vm_ops
that implement and return true for allow_speculation
callback.
allow_mismatched_32bit_el0 [ARM64]
Allow execve() of 32-bit applications and setting of the
PER_LINUX32 personality on systems where only a strict

View File

@ -745,6 +745,9 @@ static const struct vm_operations_struct ext4_file_vm_ops = {
.fault = ext4_filemap_fault,
.map_pages = filemap_map_pages,
.page_mkwrite = ext4_page_mkwrite,
#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
.allow_speculation = filemap_allow_speculation,
#endif
};
static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)

View File

@ -171,6 +171,9 @@ static const struct vm_operations_struct f2fs_file_vm_ops = {
.fault = f2fs_filemap_fault,
.map_pages = filemap_map_pages,
.page_mkwrite = f2fs_vm_page_mkwrite,
#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
.allow_speculation = filemap_allow_speculation,
#endif
};
static int get_parent_ino(struct inode *inode, nid_t *pino)

View File

@ -647,6 +647,10 @@ struct vm_operations_struct {
struct page *(*find_special_page)(struct vm_area_struct *vma,
unsigned long addr);
#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
bool (*allow_speculation)(void);
#endif
ANDROID_KABI_RESERVE(1);
ANDROID_KABI_RESERVE(2);
ANDROID_KABI_RESERVE(3);
@ -2775,6 +2779,9 @@ extern vm_fault_t filemap_fault(struct vm_fault *vmf);
extern vm_fault_t filemap_map_pages(struct vm_fault *vmf,
pgoff_t start_pgoff, pgoff_t end_pgoff);
extern vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf);
#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
extern bool filemap_allow_speculation(void);
#endif
/* mm/page-writeback.c */
int __must_check write_one_page(struct page *page);
@ -3339,6 +3346,7 @@ unsigned long wp_shared_mapping_range(struct address_space *mapping,
#endif
extern int sysctl_nr_trim_pages;
extern bool pte_map_lock_addr(struct vm_fault *vmf, unsigned long addr);
#endif /* __KERNEL__ */
#endif /* _LINUX_MM_H */

View File

@ -126,7 +126,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
SWAP_RA_HIT,
#endif
#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
SPECULATIVE_PGFAULT,
SPECULATIVE_PGFAULT_ANON,
SPECULATIVE_PGFAULT_FILE,
#endif
NR_VM_EVENT_ITEMS
};

View File

@ -2864,6 +2864,11 @@ static bool filemap_map_pmd(struct vm_fault *vmf, struct page *page)
}
if (pmd_none(*vmf->pmd)) {
if (vmf->flags & FAULT_FLAG_SPECULATIVE) {
unlock_page(page);
put_page(page);
return true;
}
vmf->ptl = pmd_lock(mm, vmf->pmd);
if (likely(pmd_none(*vmf->pmd))) {
mm_inc_nr_ptes(mm);
@ -2942,6 +2947,14 @@ static inline struct page *next_map_page(struct address_space *mapping,
mapping, xas, end_pgoff);
}
#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
bool filemap_allow_speculation(void)
{
return true;
}
EXPORT_SYMBOL_GPL(filemap_allow_speculation);
#endif
vm_fault_t filemap_map_pages(struct vm_fault *vmf,
pgoff_t start_pgoff, pgoff_t end_pgoff)
{
@ -2961,12 +2974,22 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
goto out;
if (filemap_map_pmd(vmf, head)) {
if (pmd_none(*vmf->pmd) &&
vmf->flags & FAULT_FLAG_SPECULATIVE) {
ret = VM_FAULT_RETRY;
goto out;
}
ret = VM_FAULT_NOPAGE;
goto out;
}
addr = vma->vm_start + ((start_pgoff - vma->vm_pgoff) << PAGE_SHIFT);
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, addr, &vmf->ptl);
if (!pte_map_lock_addr(vmf, addr)) {
ret = VM_FAULT_RETRY;
goto out;
}
do {
page = find_subpage(head, xas.xa_index);
if (PageHWPoison(page))
@ -3033,6 +3056,9 @@ const struct vm_operations_struct generic_file_vm_ops = {
.fault = filemap_fault,
.map_pages = filemap_map_pages,
.page_mkwrite = filemap_page_mkwrite,
#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
.allow_speculation = filemap_allow_speculation,
#endif
};
/* This is used for a general mmap of a disk file */

View File

@ -2638,7 +2638,7 @@ static bool pte_spinlock(struct vm_fault *vmf)
return ret;
}
static bool pte_map_lock(struct vm_fault *vmf)
static bool __pte_map_lock_speculative(struct vm_fault *vmf, unsigned long addr)
{
bool ret = false;
pte_t *pte;
@ -2647,12 +2647,6 @@ static bool pte_map_lock(struct vm_fault *vmf)
pmd_t pmdval;
#endif
if (!(vmf->flags & FAULT_FLAG_SPECULATIVE)) {
vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd,
vmf->address, &vmf->ptl);
return true;
}
/*
* The first vma_has_changed() guarantees the page-tables are still
* valid, having IRQs disabled ensures they stay around, hence the
@ -2662,7 +2656,7 @@ static bool pte_map_lock(struct vm_fault *vmf)
*/
local_irq_disable();
if (vma_has_changed(vmf)) {
trace_spf_vma_changed(_RET_IP_, vmf->vma, vmf->address);
trace_spf_vma_changed(_RET_IP_, vmf->vma, addr);
goto out;
}
@ -2673,7 +2667,7 @@ static bool pte_map_lock(struct vm_fault *vmf)
*/
pmdval = READ_ONCE(*vmf->pmd);
if (!pmd_same(pmdval, vmf->orig_pmd)) {
trace_spf_pmd_changed(_RET_IP_, vmf->vma, vmf->address);
trace_spf_pmd_changed(_RET_IP_, vmf->vma, addr);
goto out;
}
#endif
@ -2686,16 +2680,16 @@ static bool pte_map_lock(struct vm_fault *vmf)
* Since we are in a speculative patch, accept it could fail
*/
ptl = pte_lockptr(vmf->vma->vm_mm, vmf->pmd);
pte = pte_offset_map(vmf->pmd, vmf->address);
pte = pte_offset_map(vmf->pmd, addr);
if (unlikely(!spin_trylock(ptl))) {
pte_unmap(pte);
trace_spf_pte_lock(_RET_IP_, vmf->vma, vmf->address);
trace_spf_pte_lock(_RET_IP_, vmf->vma, addr);
goto out;
}
if (vma_has_changed(vmf)) {
pte_unmap_unlock(pte, ptl);
trace_spf_vma_changed(_RET_IP_, vmf->vma, vmf->address);
trace_spf_vma_changed(_RET_IP_, vmf->vma, addr);
goto out;
}
@ -2706,6 +2700,82 @@ static bool pte_map_lock(struct vm_fault *vmf)
local_irq_enable();
return ret;
}
static bool pte_map_lock(struct vm_fault *vmf)
{
if (!(vmf->flags & FAULT_FLAG_SPECULATIVE)) {
vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd,
vmf->address, &vmf->ptl);
return true;
}
return __pte_map_lock_speculative(vmf, vmf->address);
}
bool pte_map_lock_addr(struct vm_fault *vmf, unsigned long addr)
{
if (!(vmf->flags & FAULT_FLAG_SPECULATIVE)) {
vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd,
addr, &vmf->ptl);
return true;
}
return __pte_map_lock_speculative(vmf, addr);
}
static bool __read_mostly allow_file_spec_access;
static int __init allow_file_spec_access_setup(char *str)
{
allow_file_spec_access = true;
return 1;
}
__setup("allow_file_spec_access", allow_file_spec_access_setup);
static bool vmf_allows_speculation(struct vm_fault *vmf)
{
if (vma_is_anonymous(vmf->vma)) {
/*
* __anon_vma_prepare() requires the mmap_sem to be held
* because vm_next and vm_prev must be safe. This can't be
* guaranteed in the speculative path.
*/
if (!vmf->vma->anon_vma) {
trace_spf_vma_notsup(_RET_IP_, vmf->vma, vmf->address);
return false;
}
return true;
}
if (!allow_file_spec_access) {
/*
* Can't call vm_ops service has we don't know what they would
* do with the VMA.
* This include huge page from hugetlbfs.
*/
trace_spf_vma_notsup(_RET_IP_, vmf->vma, vmf->address);
return false;
}
if (!(vmf->vma->vm_flags & VM_SHARED) &&
(vmf->flags & FAULT_FLAG_WRITE) &&
!vmf->vma->anon_vma) {
/*
* non-anonymous private COW without anon_vma.
* See above.
*/
trace_spf_vma_notsup(_RET_IP_, vmf->vma, vmf->address);
return false;
}
if (vmf->vma->vm_ops->allow_speculation &&
vmf->vma->vm_ops->allow_speculation()) {
return true;
}
trace_spf_vma_notsup(_RET_IP_, vmf->vma, vmf->address);
return false;
}
#else
static inline bool pte_spinlock(struct vm_fault *vmf)
{
@ -2720,6 +2790,18 @@ static inline bool pte_map_lock(struct vm_fault *vmf)
vmf->address, &vmf->ptl);
return true;
}
inline bool pte_map_lock_addr(struct vm_fault *vmf, unsigned long addr)
{
vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd,
addr, &vmf->ptl);
return true;
}
static inline bool vmf_allows_speculation(struct vm_fault *vmf)
{
return false;
}
#endif /* CONFIG_SPECULATIVE_PAGE_FAULT */
/*
@ -4496,6 +4578,7 @@ static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud)
static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
{
pte_t entry;
vm_fault_t ret = 0;
if (unlikely(pmd_none(*vmf->pmd))) {
/*
@ -4559,7 +4642,8 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
if (!vmf->pte) {
if (vma_is_anonymous(vmf->vma))
return do_anonymous_page(vmf);
else if (vmf->flags & FAULT_FLAG_SPECULATIVE)
else if ((vmf->flags & FAULT_FLAG_SPECULATIVE) &&
!vmf_allows_speculation(vmf))
return VM_FAULT_RETRY;
else
return do_fault(vmf);
@ -4591,6 +4675,8 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
/* Skip spurious TLB flush for retried page fault */
if (vmf->flags & FAULT_FLAG_TRIED)
goto unlock;
if (vmf->flags & FAULT_FLAG_SPECULATIVE)
ret = VM_FAULT_RETRY;
/*
* This is needed only for protection faults but the arch code
* is not yet telling us if this is a protection fault or not.
@ -4602,7 +4688,7 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
}
unlock:
pte_unmap_unlock(vmf->pte, vmf->ptl);
return 0;
return ret;
}
/*
@ -4794,6 +4880,7 @@ static vm_fault_t ___handle_speculative_fault(struct mm_struct *mm,
.pgoff = linear_page_index(vma, address),
.vma = vma,
.gfp_mask = __get_fault_gfp_mask(vma),
.flags = flags,
};
#ifdef CONFIG_NUMA
struct mempolicy *pol;
@ -4815,25 +4902,8 @@ static vm_fault_t ___handle_speculative_fault(struct mm_struct *mm,
return VM_FAULT_RETRY;
}
/*
* Can't call vm_ops service has we don't know what they would do
* with the VMA.
* This include huge page from hugetlbfs.
*/
if (vmf.vma->vm_ops) {
trace_spf_vma_notsup(_RET_IP_, vmf.vma, address);
if (!vmf_allows_speculation(&vmf))
return VM_FAULT_RETRY;
}
/*
* __anon_vma_prepare() requires the mmap_sem to be held
* because vm_next and vm_prev must be safe. This can't be guaranteed
* in the speculative path.
*/
if (unlikely(!vmf.vma->anon_vma)) {
trace_spf_vma_notsup(_RET_IP_, vmf.vma, address);
return VM_FAULT_RETRY;
}
vmf.vma_flags = READ_ONCE(vmf.vma->vm_flags);
vmf.vma_page_prot = READ_ONCE(vmf.vma->vm_page_prot);
@ -4964,8 +5034,12 @@ static vm_fault_t ___handle_speculative_fault(struct mm_struct *mm,
ret = handle_pte_fault(&vmf);
mem_cgroup_exit_user_fault();
if (ret != VM_FAULT_RETRY)
count_vm_event(SPECULATIVE_PGFAULT);
if (ret != VM_FAULT_RETRY) {
if (vma_is_anonymous(vmf.vma))
count_vm_event(SPECULATIVE_PGFAULT_ANON);
else
count_vm_event(SPECULATIVE_PGFAULT_FILE);
}
/*
* The task may have entered a memcg OOM situation but

View File

@ -1676,6 +1676,14 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
}
EXPORT_SYMBOL(filemap_map_pages);
#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
bool filemap_allow_speculation(void)
{
BUG();
return false;
}
#endif
int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
unsigned long addr, void *buf, int len, unsigned int gup_flags)
{

View File

@ -3955,6 +3955,9 @@ static const struct vm_operations_struct shmem_vm_ops = {
.set_policy = shmem_set_policy,
.get_policy = shmem_get_policy,
#endif
#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
.allow_speculation = filemap_allow_speculation,
#endif
};
int shmem_init_fs_context(struct fs_context *fc)

View File

@ -1353,7 +1353,8 @@ const char * const vmstat_text[] = {
"swap_ra_hit",
#endif
#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
"speculative_pgfault"
"speculative_pgfault",
"speculative_pgfault_file"
#endif
#endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */
};