mm: extract code to fault in a page from __get_user_pages()
Nesting level in __get_user_pages() is just insane. Let's try to fix it a bit. Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
69e68b4f03
commit
1674448345
138
mm/gup.c
138
mm/gup.c
@ -214,12 +214,6 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
|
|||||||
return follow_page_pte(vma, address, pmd, flags);
|
return follow_page_pte(vma, address, pmd, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
|
|
||||||
{
|
|
||||||
return stack_guard_page_start(vma, addr) ||
|
|
||||||
stack_guard_page_end(vma, addr+PAGE_SIZE);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int get_gate_page(struct mm_struct *mm, unsigned long address,
|
static int get_gate_page(struct mm_struct *mm, unsigned long address,
|
||||||
unsigned int gup_flags, struct vm_area_struct **vma,
|
unsigned int gup_flags, struct vm_area_struct **vma,
|
||||||
struct page **page)
|
struct page **page)
|
||||||
@ -264,6 +258,63 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address,
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
|
||||||
|
unsigned long address, unsigned int *flags, int *nonblocking)
|
||||||
|
{
|
||||||
|
struct mm_struct *mm = vma->vm_mm;
|
||||||
|
unsigned int fault_flags = 0;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
/* For mlock, just skip the stack guard page. */
|
||||||
|
if ((*flags & FOLL_MLOCK) &&
|
||||||
|
(stack_guard_page_start(vma, address) ||
|
||||||
|
stack_guard_page_end(vma, address + PAGE_SIZE)))
|
||||||
|
return -ENOENT;
|
||||||
|
if (*flags & FOLL_WRITE)
|
||||||
|
fault_flags |= FAULT_FLAG_WRITE;
|
||||||
|
if (nonblocking)
|
||||||
|
fault_flags |= FAULT_FLAG_ALLOW_RETRY;
|
||||||
|
if (*flags & FOLL_NOWAIT)
|
||||||
|
fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT;
|
||||||
|
|
||||||
|
ret = handle_mm_fault(mm, vma, address, fault_flags);
|
||||||
|
if (ret & VM_FAULT_ERROR) {
|
||||||
|
if (ret & VM_FAULT_OOM)
|
||||||
|
return -ENOMEM;
|
||||||
|
if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE))
|
||||||
|
return *flags & FOLL_HWPOISON ? -EHWPOISON : -EFAULT;
|
||||||
|
if (ret & VM_FAULT_SIGBUS)
|
||||||
|
return -EFAULT;
|
||||||
|
BUG();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tsk) {
|
||||||
|
if (ret & VM_FAULT_MAJOR)
|
||||||
|
tsk->maj_flt++;
|
||||||
|
else
|
||||||
|
tsk->min_flt++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ret & VM_FAULT_RETRY) {
|
||||||
|
if (nonblocking)
|
||||||
|
*nonblocking = 0;
|
||||||
|
return -EBUSY;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The VM_FAULT_WRITE bit tells us that do_wp_page has broken COW when
|
||||||
|
* necessary, even if maybe_mkwrite decided not to set pte_write. We
|
||||||
|
* can thus safely do subsequent page lookups as if they were reads.
|
||||||
|
* But only do so when looping for pte_write is futile: in some cases
|
||||||
|
* userspace may also be wanting to write to the gotten user page,
|
||||||
|
* which a read fault here might prevent (a readonly page might get
|
||||||
|
* reCOWed by userspace write).
|
||||||
|
*/
|
||||||
|
if ((ret & VM_FAULT_WRITE) && !(vma->vm_flags & VM_WRITE))
|
||||||
|
*flags &= ~FOLL_WRITE;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* __get_user_pages() - pin user pages in memory
|
* __get_user_pages() - pin user pages in memory
|
||||||
* @tsk: task_struct of target task
|
* @tsk: task_struct of target task
|
||||||
@ -410,69 +461,22 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
|
|||||||
while (!(page = follow_page_mask(vma, start,
|
while (!(page = follow_page_mask(vma, start,
|
||||||
foll_flags, &page_mask))) {
|
foll_flags, &page_mask))) {
|
||||||
int ret;
|
int ret;
|
||||||
unsigned int fault_flags = 0;
|
ret = faultin_page(tsk, vma, start, &foll_flags,
|
||||||
|
nonblocking);
|
||||||
/* For mlock, just skip the stack guard page. */
|
switch (ret) {
|
||||||
if (foll_flags & FOLL_MLOCK) {
|
case 0:
|
||||||
if (stack_guard_page(vma, start))
|
break;
|
||||||
goto next_page;
|
case -EFAULT:
|
||||||
}
|
case -ENOMEM:
|
||||||
if (foll_flags & FOLL_WRITE)
|
case -EHWPOISON:
|
||||||
fault_flags |= FAULT_FLAG_WRITE;
|
return i ? i : ret;
|
||||||
if (nonblocking)
|
case -EBUSY:
|
||||||
fault_flags |= FAULT_FLAG_ALLOW_RETRY;
|
return i;
|
||||||
if (foll_flags & FOLL_NOWAIT)
|
case -ENOENT:
|
||||||
fault_flags |= (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT);
|
goto next_page;
|
||||||
|
default:
|
||||||
ret = handle_mm_fault(mm, vma, start,
|
|
||||||
fault_flags);
|
|
||||||
|
|
||||||
if (ret & VM_FAULT_ERROR) {
|
|
||||||
if (ret & VM_FAULT_OOM)
|
|
||||||
return i ? i : -ENOMEM;
|
|
||||||
if (ret & (VM_FAULT_HWPOISON |
|
|
||||||
VM_FAULT_HWPOISON_LARGE)) {
|
|
||||||
if (i)
|
|
||||||
return i;
|
|
||||||
else if (gup_flags & FOLL_HWPOISON)
|
|
||||||
return -EHWPOISON;
|
|
||||||
else
|
|
||||||
return -EFAULT;
|
|
||||||
}
|
|
||||||
if (ret & VM_FAULT_SIGBUS)
|
|
||||||
goto efault;
|
|
||||||
BUG();
|
BUG();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (tsk) {
|
|
||||||
if (ret & VM_FAULT_MAJOR)
|
|
||||||
tsk->maj_flt++;
|
|
||||||
else
|
|
||||||
tsk->min_flt++;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ret & VM_FAULT_RETRY) {
|
|
||||||
if (nonblocking)
|
|
||||||
*nonblocking = 0;
|
|
||||||
return i;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* The VM_FAULT_WRITE bit tells us that
|
|
||||||
* do_wp_page has broken COW when necessary,
|
|
||||||
* even if maybe_mkwrite decided not to set
|
|
||||||
* pte_write. We can thus safely do subsequent
|
|
||||||
* page lookups as if they were reads. But only
|
|
||||||
* do so when looping for pte_write is futile:
|
|
||||||
* in some cases userspace may also be wanting
|
|
||||||
* to write to the gotten user page, which a
|
|
||||||
* read fault here might prevent (a readonly
|
|
||||||
* page might get reCOWed by userspace write).
|
|
||||||
*/
|
|
||||||
if ((ret & VM_FAULT_WRITE) &&
|
|
||||||
!(vma->vm_flags & VM_WRITE))
|
|
||||||
foll_flags &= ~FOLL_WRITE;
|
|
||||||
|
|
||||||
cond_resched();
|
cond_resched();
|
||||||
}
|
}
|
||||||
if (IS_ERR(page))
|
if (IS_ERR(page))
|
||||||
|
Loading…
Reference in New Issue
Block a user