ANDROID: mm: allow vmas with vm_ops to be speculatively handled
Right now only anonymous page faults are speculatively handled, thus leaving out a large percentage of faults still requiring to take mmap_sem. These were left out since there can be fault handlers mainly in the fs layer which may use vma in unknown ways. This patch enables speculative fault for ext4, f2fs and shmem. The feature is disabled by default and enabled via allow_file_spec_access kernel param. Bug: 171954515 Change-Id: I0d23ebf299000e4ac5e2c71bc0b7fc9006e98da9 Signed-off-by: Vinayak Menon <vinmenon@codeaurora.org>
This commit is contained in:
parent
77e791866b
commit
35eacb5c87
@ -289,6 +289,12 @@
|
||||
do not want to use tracing_snapshot_alloc() as it needs
|
||||
to be done where GFP_KERNEL allocations are allowed.
|
||||
|
||||
allow_file_spec_access
|
||||
Allow speculative faults on file backed pages.
|
||||
Speculative faults are enabled only for those vm_ops
|
||||
that implement and return true for allow_speculation
|
||||
callback.
|
||||
|
||||
allow_mismatched_32bit_el0 [ARM64]
|
||||
Allow execve() of 32-bit applications and setting of the
|
||||
PER_LINUX32 personality on systems where only a strict
|
||||
|
@ -745,6 +745,9 @@ static const struct vm_operations_struct ext4_file_vm_ops = {
|
||||
.fault = ext4_filemap_fault,
|
||||
.map_pages = filemap_map_pages,
|
||||
.page_mkwrite = ext4_page_mkwrite,
|
||||
#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
|
||||
.allow_speculation = filemap_allow_speculation,
|
||||
#endif
|
||||
};
|
||||
|
||||
static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
|
@ -171,6 +171,9 @@ static const struct vm_operations_struct f2fs_file_vm_ops = {
|
||||
.fault = f2fs_filemap_fault,
|
||||
.map_pages = filemap_map_pages,
|
||||
.page_mkwrite = f2fs_vm_page_mkwrite,
|
||||
#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
|
||||
.allow_speculation = filemap_allow_speculation,
|
||||
#endif
|
||||
};
|
||||
|
||||
static int get_parent_ino(struct inode *inode, nid_t *pino)
|
||||
|
@ -647,6 +647,10 @@ struct vm_operations_struct {
|
||||
struct page *(*find_special_page)(struct vm_area_struct *vma,
|
||||
unsigned long addr);
|
||||
|
||||
#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
|
||||
bool (*allow_speculation)(void);
|
||||
#endif
|
||||
|
||||
ANDROID_KABI_RESERVE(1);
|
||||
ANDROID_KABI_RESERVE(2);
|
||||
ANDROID_KABI_RESERVE(3);
|
||||
@ -2775,6 +2779,9 @@ extern vm_fault_t filemap_fault(struct vm_fault *vmf);
|
||||
extern vm_fault_t filemap_map_pages(struct vm_fault *vmf,
|
||||
pgoff_t start_pgoff, pgoff_t end_pgoff);
|
||||
extern vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf);
|
||||
#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
|
||||
extern bool filemap_allow_speculation(void);
|
||||
#endif
|
||||
|
||||
/* mm/page-writeback.c */
|
||||
int __must_check write_one_page(struct page *page);
|
||||
@ -3339,6 +3346,7 @@ unsigned long wp_shared_mapping_range(struct address_space *mapping,
|
||||
#endif
|
||||
|
||||
extern int sysctl_nr_trim_pages;
|
||||
extern bool pte_map_lock_addr(struct vm_fault *vmf, unsigned long addr);
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
#endif /* _LINUX_MM_H */
|
||||
|
@ -126,7 +126,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
|
||||
SWAP_RA_HIT,
|
||||
#endif
|
||||
#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
|
||||
SPECULATIVE_PGFAULT,
|
||||
SPECULATIVE_PGFAULT_ANON,
|
||||
SPECULATIVE_PGFAULT_FILE,
|
||||
#endif
|
||||
NR_VM_EVENT_ITEMS
|
||||
};
|
||||
|
28
mm/filemap.c
28
mm/filemap.c
@ -2864,6 +2864,11 @@ static bool filemap_map_pmd(struct vm_fault *vmf, struct page *page)
|
||||
}
|
||||
|
||||
if (pmd_none(*vmf->pmd)) {
|
||||
if (vmf->flags & FAULT_FLAG_SPECULATIVE) {
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
return true;
|
||||
}
|
||||
vmf->ptl = pmd_lock(mm, vmf->pmd);
|
||||
if (likely(pmd_none(*vmf->pmd))) {
|
||||
mm_inc_nr_ptes(mm);
|
||||
@ -2942,6 +2947,14 @@ static inline struct page *next_map_page(struct address_space *mapping,
|
||||
mapping, xas, end_pgoff);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
|
||||
bool filemap_allow_speculation(void)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(filemap_allow_speculation);
|
||||
#endif
|
||||
|
||||
vm_fault_t filemap_map_pages(struct vm_fault *vmf,
|
||||
pgoff_t start_pgoff, pgoff_t end_pgoff)
|
||||
{
|
||||
@ -2961,12 +2974,22 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
|
||||
goto out;
|
||||
|
||||
if (filemap_map_pmd(vmf, head)) {
|
||||
if (pmd_none(*vmf->pmd) &&
|
||||
vmf->flags & FAULT_FLAG_SPECULATIVE) {
|
||||
ret = VM_FAULT_RETRY;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = VM_FAULT_NOPAGE;
|
||||
goto out;
|
||||
}
|
||||
|
||||
addr = vma->vm_start + ((start_pgoff - vma->vm_pgoff) << PAGE_SHIFT);
|
||||
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, addr, &vmf->ptl);
|
||||
if (!pte_map_lock_addr(vmf, addr)) {
|
||||
ret = VM_FAULT_RETRY;
|
||||
goto out;
|
||||
}
|
||||
|
||||
do {
|
||||
page = find_subpage(head, xas.xa_index);
|
||||
if (PageHWPoison(page))
|
||||
@ -3033,6 +3056,9 @@ const struct vm_operations_struct generic_file_vm_ops = {
|
||||
.fault = filemap_fault,
|
||||
.map_pages = filemap_map_pages,
|
||||
.page_mkwrite = filemap_page_mkwrite,
|
||||
#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
|
||||
.allow_speculation = filemap_allow_speculation,
|
||||
#endif
|
||||
};
|
||||
|
||||
/* This is used for a general mmap of a disk file */
|
||||
|
142
mm/memory.c
142
mm/memory.c
@ -2638,7 +2638,7 @@ static bool pte_spinlock(struct vm_fault *vmf)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool pte_map_lock(struct vm_fault *vmf)
|
||||
static bool __pte_map_lock_speculative(struct vm_fault *vmf, unsigned long addr)
|
||||
{
|
||||
bool ret = false;
|
||||
pte_t *pte;
|
||||
@ -2647,12 +2647,6 @@ static bool pte_map_lock(struct vm_fault *vmf)
|
||||
pmd_t pmdval;
|
||||
#endif
|
||||
|
||||
if (!(vmf->flags & FAULT_FLAG_SPECULATIVE)) {
|
||||
vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd,
|
||||
vmf->address, &vmf->ptl);
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* The first vma_has_changed() guarantees the page-tables are still
|
||||
* valid, having IRQs disabled ensures they stay around, hence the
|
||||
@ -2662,7 +2656,7 @@ static bool pte_map_lock(struct vm_fault *vmf)
|
||||
*/
|
||||
local_irq_disable();
|
||||
if (vma_has_changed(vmf)) {
|
||||
trace_spf_vma_changed(_RET_IP_, vmf->vma, vmf->address);
|
||||
trace_spf_vma_changed(_RET_IP_, vmf->vma, addr);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -2673,7 +2667,7 @@ static bool pte_map_lock(struct vm_fault *vmf)
|
||||
*/
|
||||
pmdval = READ_ONCE(*vmf->pmd);
|
||||
if (!pmd_same(pmdval, vmf->orig_pmd)) {
|
||||
trace_spf_pmd_changed(_RET_IP_, vmf->vma, vmf->address);
|
||||
trace_spf_pmd_changed(_RET_IP_, vmf->vma, addr);
|
||||
goto out;
|
||||
}
|
||||
#endif
|
||||
@ -2686,16 +2680,16 @@ static bool pte_map_lock(struct vm_fault *vmf)
|
||||
* Since we are in a speculative patch, accept it could fail
|
||||
*/
|
||||
ptl = pte_lockptr(vmf->vma->vm_mm, vmf->pmd);
|
||||
pte = pte_offset_map(vmf->pmd, vmf->address);
|
||||
pte = pte_offset_map(vmf->pmd, addr);
|
||||
if (unlikely(!spin_trylock(ptl))) {
|
||||
pte_unmap(pte);
|
||||
trace_spf_pte_lock(_RET_IP_, vmf->vma, vmf->address);
|
||||
trace_spf_pte_lock(_RET_IP_, vmf->vma, addr);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (vma_has_changed(vmf)) {
|
||||
pte_unmap_unlock(pte, ptl);
|
||||
trace_spf_vma_changed(_RET_IP_, vmf->vma, vmf->address);
|
||||
trace_spf_vma_changed(_RET_IP_, vmf->vma, addr);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -2706,6 +2700,82 @@ static bool pte_map_lock(struct vm_fault *vmf)
|
||||
local_irq_enable();
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool pte_map_lock(struct vm_fault *vmf)
|
||||
{
|
||||
if (!(vmf->flags & FAULT_FLAG_SPECULATIVE)) {
|
||||
vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd,
|
||||
vmf->address, &vmf->ptl);
|
||||
return true;
|
||||
}
|
||||
|
||||
return __pte_map_lock_speculative(vmf, vmf->address);
|
||||
}
|
||||
|
||||
bool pte_map_lock_addr(struct vm_fault *vmf, unsigned long addr)
|
||||
{
|
||||
if (!(vmf->flags & FAULT_FLAG_SPECULATIVE)) {
|
||||
vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd,
|
||||
addr, &vmf->ptl);
|
||||
return true;
|
||||
}
|
||||
|
||||
return __pte_map_lock_speculative(vmf, addr);
|
||||
}
|
||||
|
||||
static bool __read_mostly allow_file_spec_access;
|
||||
static int __init allow_file_spec_access_setup(char *str)
|
||||
{
|
||||
allow_file_spec_access = true;
|
||||
return 1;
|
||||
}
|
||||
__setup("allow_file_spec_access", allow_file_spec_access_setup);
|
||||
|
||||
static bool vmf_allows_speculation(struct vm_fault *vmf)
|
||||
{
|
||||
if (vma_is_anonymous(vmf->vma)) {
|
||||
/*
|
||||
* __anon_vma_prepare() requires the mmap_sem to be held
|
||||
* because vm_next and vm_prev must be safe. This can't be
|
||||
* guaranteed in the speculative path.
|
||||
*/
|
||||
if (!vmf->vma->anon_vma) {
|
||||
trace_spf_vma_notsup(_RET_IP_, vmf->vma, vmf->address);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!allow_file_spec_access) {
|
||||
/*
|
||||
* Can't call vm_ops service has we don't know what they would
|
||||
* do with the VMA.
|
||||
* This include huge page from hugetlbfs.
|
||||
*/
|
||||
trace_spf_vma_notsup(_RET_IP_, vmf->vma, vmf->address);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!(vmf->vma->vm_flags & VM_SHARED) &&
|
||||
(vmf->flags & FAULT_FLAG_WRITE) &&
|
||||
!vmf->vma->anon_vma) {
|
||||
/*
|
||||
* non-anonymous private COW without anon_vma.
|
||||
* See above.
|
||||
*/
|
||||
trace_spf_vma_notsup(_RET_IP_, vmf->vma, vmf->address);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (vmf->vma->vm_ops->allow_speculation &&
|
||||
vmf->vma->vm_ops->allow_speculation()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
trace_spf_vma_notsup(_RET_IP_, vmf->vma, vmf->address);
|
||||
return false;
|
||||
}
|
||||
|
||||
#else
|
||||
static inline bool pte_spinlock(struct vm_fault *vmf)
|
||||
{
|
||||
@ -2720,6 +2790,18 @@ static inline bool pte_map_lock(struct vm_fault *vmf)
|
||||
vmf->address, &vmf->ptl);
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool pte_map_lock_addr(struct vm_fault *vmf, unsigned long addr)
|
||||
{
|
||||
vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd,
|
||||
addr, &vmf->ptl);
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool vmf_allows_speculation(struct vm_fault *vmf)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif /* CONFIG_SPECULATIVE_PAGE_FAULT */
|
||||
|
||||
/*
|
||||
@ -4496,6 +4578,7 @@ static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud)
|
||||
static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
|
||||
{
|
||||
pte_t entry;
|
||||
vm_fault_t ret = 0;
|
||||
|
||||
if (unlikely(pmd_none(*vmf->pmd))) {
|
||||
/*
|
||||
@ -4559,7 +4642,8 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
|
||||
if (!vmf->pte) {
|
||||
if (vma_is_anonymous(vmf->vma))
|
||||
return do_anonymous_page(vmf);
|
||||
else if (vmf->flags & FAULT_FLAG_SPECULATIVE)
|
||||
else if ((vmf->flags & FAULT_FLAG_SPECULATIVE) &&
|
||||
!vmf_allows_speculation(vmf))
|
||||
return VM_FAULT_RETRY;
|
||||
else
|
||||
return do_fault(vmf);
|
||||
@ -4591,6 +4675,8 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
|
||||
/* Skip spurious TLB flush for retried page fault */
|
||||
if (vmf->flags & FAULT_FLAG_TRIED)
|
||||
goto unlock;
|
||||
if (vmf->flags & FAULT_FLAG_SPECULATIVE)
|
||||
ret = VM_FAULT_RETRY;
|
||||
/*
|
||||
* This is needed only for protection faults but the arch code
|
||||
* is not yet telling us if this is a protection fault or not.
|
||||
@ -4602,7 +4688,7 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
|
||||
}
|
||||
unlock:
|
||||
pte_unmap_unlock(vmf->pte, vmf->ptl);
|
||||
return 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -4794,6 +4880,7 @@ static vm_fault_t ___handle_speculative_fault(struct mm_struct *mm,
|
||||
.pgoff = linear_page_index(vma, address),
|
||||
.vma = vma,
|
||||
.gfp_mask = __get_fault_gfp_mask(vma),
|
||||
.flags = flags,
|
||||
};
|
||||
#ifdef CONFIG_NUMA
|
||||
struct mempolicy *pol;
|
||||
@ -4815,25 +4902,8 @@ static vm_fault_t ___handle_speculative_fault(struct mm_struct *mm,
|
||||
return VM_FAULT_RETRY;
|
||||
}
|
||||
|
||||
/*
|
||||
* Can't call vm_ops service has we don't know what they would do
|
||||
* with the VMA.
|
||||
* This include huge page from hugetlbfs.
|
||||
*/
|
||||
if (vmf.vma->vm_ops) {
|
||||
trace_spf_vma_notsup(_RET_IP_, vmf.vma, address);
|
||||
if (!vmf_allows_speculation(&vmf))
|
||||
return VM_FAULT_RETRY;
|
||||
}
|
||||
|
||||
/*
|
||||
* __anon_vma_prepare() requires the mmap_sem to be held
|
||||
* because vm_next and vm_prev must be safe. This can't be guaranteed
|
||||
* in the speculative path.
|
||||
*/
|
||||
if (unlikely(!vmf.vma->anon_vma)) {
|
||||
trace_spf_vma_notsup(_RET_IP_, vmf.vma, address);
|
||||
return VM_FAULT_RETRY;
|
||||
}
|
||||
|
||||
vmf.vma_flags = READ_ONCE(vmf.vma->vm_flags);
|
||||
vmf.vma_page_prot = READ_ONCE(vmf.vma->vm_page_prot);
|
||||
@ -4964,8 +5034,12 @@ static vm_fault_t ___handle_speculative_fault(struct mm_struct *mm,
|
||||
ret = handle_pte_fault(&vmf);
|
||||
mem_cgroup_exit_user_fault();
|
||||
|
||||
if (ret != VM_FAULT_RETRY)
|
||||
count_vm_event(SPECULATIVE_PGFAULT);
|
||||
if (ret != VM_FAULT_RETRY) {
|
||||
if (vma_is_anonymous(vmf.vma))
|
||||
count_vm_event(SPECULATIVE_PGFAULT_ANON);
|
||||
else
|
||||
count_vm_event(SPECULATIVE_PGFAULT_FILE);
|
||||
}
|
||||
|
||||
/*
|
||||
* The task may have entered a memcg OOM situation but
|
||||
|
@ -1676,6 +1676,14 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
|
||||
}
|
||||
EXPORT_SYMBOL(filemap_map_pages);
|
||||
|
||||
#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
|
||||
bool filemap_allow_speculation(void)
|
||||
{
|
||||
BUG();
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
|
||||
unsigned long addr, void *buf, int len, unsigned int gup_flags)
|
||||
{
|
||||
|
@ -3955,6 +3955,9 @@ static const struct vm_operations_struct shmem_vm_ops = {
|
||||
.set_policy = shmem_set_policy,
|
||||
.get_policy = shmem_get_policy,
|
||||
#endif
|
||||
#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
|
||||
.allow_speculation = filemap_allow_speculation,
|
||||
#endif
|
||||
};
|
||||
|
||||
int shmem_init_fs_context(struct fs_context *fc)
|
||||
|
@ -1353,7 +1353,8 @@ const char * const vmstat_text[] = {
|
||||
"swap_ra_hit",
|
||||
#endif
|
||||
#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
|
||||
"speculative_pgfault"
|
||||
"speculative_pgfault",
|
||||
"speculative_pgfault_file"
|
||||
#endif
|
||||
#endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user