Merge branch 'akpm' (patches from Andrew Morton)

Merge more patches from Andrew Morton:
 "The rest of MM.  Plus one misc cleanup"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (35 commits)
  mm/Kconfig: add MMU dependency for MIGRATION.
  kernel: replace strict_strto*() with kstrto*()
  mm, thp: count thp_fault_fallback anytime thp fault fails
  thp: consolidate code between handle_mm_fault() and do_huge_pmd_anonymous_page()
  thp: do_huge_pmd_anonymous_page() cleanup
  thp: move maybe_pmd_mkwrite() out of mk_huge_pmd()
  mm: cleanup add_to_page_cache_locked()
  thp: account anon transparent huge pages into NR_ANON_PAGES
  truncate: drop 'oldsize' truncate_pagecache() parameter
  mm: make lru_add_drain_all() selective
  memcg: document cgroup dirty/writeback memory statistics
  memcg: add per cgroup writeback pages accounting
  memcg: check for proper lock held in mem_cgroup_update_page_stat
  memcg: remove MEMCG_NR_FILE_MAPPED
  memcg: reduce function dereference
  memcg: avoid overflow caused by PAGE_ALIGN
  memcg: rename RESOURCE_MAX to RES_COUNTER_MAX
  memcg: correct RESOURCE_MAX to ULLONG_MAX
  mm: memcg: do not trap chargers with full callstack on OOM
  mm: memcg: rework and document OOM waiting and wakeup
  ...
This commit is contained in:
Linus Torvalds 2013-09-12 15:44:27 -07:00
commit ac4de9543a
79 changed files with 972 additions and 918 deletions

View File

@ -490,6 +490,8 @@ pgpgin - # of charging events to the memory cgroup. The charging
pgpgout - # of uncharging events to the memory cgroup. The uncharging pgpgout - # of uncharging events to the memory cgroup. The uncharging
event happens each time a page is unaccounted from the cgroup. event happens each time a page is unaccounted from the cgroup.
swap - # of bytes of swap usage swap - # of bytes of swap usage
writeback - # of bytes of file/anon cache that are queued for syncing to
disk.
inactive_anon - # of bytes of anonymous and swap cache memory on inactive inactive_anon - # of bytes of anonymous and swap cache memory on inactive
LRU list. LRU list.
active_anon - # of bytes of anonymous and swap cache memory on active active_anon - # of bytes of anonymous and swap cache memory on active

View File

@ -89,8 +89,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
const struct exception_table_entry *fixup; const struct exception_table_entry *fixup;
int fault, si_code = SEGV_MAPERR; int fault, si_code = SEGV_MAPERR;
siginfo_t info; siginfo_t info;
unsigned int flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
(cause > 0 ? FAULT_FLAG_WRITE : 0));
/* As of EV6, a load into $31/$f31 is a prefetch, and never faults /* As of EV6, a load into $31/$f31 is a prefetch, and never faults
(or is suppressed by the PALcode). Support that for older CPUs (or is suppressed by the PALcode). Support that for older CPUs
@ -115,7 +114,8 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
if (address >= TASK_SIZE) if (address >= TASK_SIZE)
goto vmalloc_fault; goto vmalloc_fault;
#endif #endif
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
retry: retry:
down_read(&mm->mmap_sem); down_read(&mm->mmap_sem);
vma = find_vma(mm, address); vma = find_vma(mm, address);
@ -142,6 +142,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
} else { } else {
if (!(vma->vm_flags & VM_WRITE)) if (!(vma->vm_flags & VM_WRITE))
goto bad_area; goto bad_area;
flags |= FAULT_FLAG_WRITE;
} }
/* If for any reason at all we couldn't handle the fault, /* If for any reason at all we couldn't handle the fault,

View File

@ -60,8 +60,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long address)
siginfo_t info; siginfo_t info;
int fault, ret; int fault, ret;
int write = regs->ecr_cause & ECR_C_PROTV_STORE; /* ST/EX */ int write = regs->ecr_cause & ECR_C_PROTV_STORE; /* ST/EX */
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
(write ? FAULT_FLAG_WRITE : 0);
/* /*
* We fault-in kernel-space virtual memory on-demand. The * We fault-in kernel-space virtual memory on-demand. The
@ -89,6 +88,8 @@ void do_page_fault(struct pt_regs *regs, unsigned long address)
if (in_atomic() || !mm) if (in_atomic() || !mm)
goto no_context; goto no_context;
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
retry: retry:
down_read(&mm->mmap_sem); down_read(&mm->mmap_sem);
vma = find_vma(mm, address); vma = find_vma(mm, address);
@ -117,12 +118,12 @@ void do_page_fault(struct pt_regs *regs, unsigned long address)
if (write) { if (write) {
if (!(vma->vm_flags & VM_WRITE)) if (!(vma->vm_flags & VM_WRITE))
goto bad_area; goto bad_area;
flags |= FAULT_FLAG_WRITE;
} else { } else {
if (!(vma->vm_flags & (VM_READ | VM_EXEC))) if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
goto bad_area; goto bad_area;
} }
survive:
/* /*
* If for any reason at all we couldn't handle the fault, * If for any reason at all we couldn't handle the fault,
* make sure we exit gracefully rather than endlessly redo * make sure we exit gracefully rather than endlessly redo
@ -201,10 +202,6 @@ void do_page_fault(struct pt_regs *regs, unsigned long address)
die("Oops", regs, address); die("Oops", regs, address);
out_of_memory: out_of_memory:
if (is_global_init(tsk)) {
yield();
goto survive;
}
up_read(&mm->mmap_sem); up_read(&mm->mmap_sem);
if (user_mode(regs)) { if (user_mode(regs)) {

View File

@ -261,9 +261,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
struct task_struct *tsk; struct task_struct *tsk;
struct mm_struct *mm; struct mm_struct *mm;
int fault, sig, code; int fault, sig, code;
int write = fsr & FSR_WRITE; unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
(write ? FAULT_FLAG_WRITE : 0);
if (notify_page_fault(regs, fsr)) if (notify_page_fault(regs, fsr))
return 0; return 0;
@ -282,6 +280,11 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
if (in_atomic() || !mm) if (in_atomic() || !mm)
goto no_context; goto no_context;
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
if (fsr & FSR_WRITE)
flags |= FAULT_FLAG_WRITE;
/* /*
* As per x86, we may deadlock here. However, since the kernel only * As per x86, we may deadlock here. However, since the kernel only
* validly references user space from well defined areas of the code, * validly references user space from well defined areas of the code,
@ -349,6 +352,13 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS)))) if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS))))
return 0; return 0;
/*
* If we are in kernel mode at this point, we
* have no context to handle this fault with.
*/
if (!user_mode(regs))
goto no_context;
if (fault & VM_FAULT_OOM) { if (fault & VM_FAULT_OOM) {
/* /*
* We ran out of memory, call the OOM killer, and return to * We ran out of memory, call the OOM killer, and return to
@ -359,13 +369,6 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
return 0; return 0;
} }
/*
* If we are in kernel mode at this point, we
* have no context to handle this fault with.
*/
if (!user_mode(regs))
goto no_context;
if (fault & VM_FAULT_SIGBUS) { if (fault & VM_FAULT_SIGBUS) {
/* /*
* We had some memory, but were unable to * We had some memory, but were unable to

View File

@ -199,13 +199,6 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC; unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC;
unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
if (esr & ESR_LNX_EXEC) {
vm_flags = VM_EXEC;
} else if ((esr & ESR_WRITE) && !(esr & ESR_CM)) {
vm_flags = VM_WRITE;
mm_flags |= FAULT_FLAG_WRITE;
}
tsk = current; tsk = current;
mm = tsk->mm; mm = tsk->mm;
@ -220,6 +213,16 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
if (in_atomic() || !mm) if (in_atomic() || !mm)
goto no_context; goto no_context;
if (user_mode(regs))
mm_flags |= FAULT_FLAG_USER;
if (esr & ESR_LNX_EXEC) {
vm_flags = VM_EXEC;
} else if ((esr & ESR_WRITE) && !(esr & ESR_CM)) {
vm_flags = VM_WRITE;
mm_flags |= FAULT_FLAG_WRITE;
}
/* /*
* As per x86, we may deadlock here. However, since the kernel only * As per x86, we may deadlock here. However, since the kernel only
* validly references user space from well defined areas of the code, * validly references user space from well defined areas of the code,
@ -288,6 +291,13 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
VM_FAULT_BADACCESS)))) VM_FAULT_BADACCESS))))
return 0; return 0;
/*
* If we are in kernel mode at this point, we have no context to
* handle this fault with.
*/
if (!user_mode(regs))
goto no_context;
if (fault & VM_FAULT_OOM) { if (fault & VM_FAULT_OOM) {
/* /*
* We ran out of memory, call the OOM killer, and return to * We ran out of memory, call the OOM killer, and return to
@ -298,13 +308,6 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
return 0; return 0;
} }
/*
* If we are in kernel mode at this point, we have no context to
* handle this fault with.
*/
if (!user_mode(regs))
goto no_context;
if (fault & VM_FAULT_SIGBUS) { if (fault & VM_FAULT_SIGBUS) {
/* /*
* We had some memory, but were unable to successfully fix up * We had some memory, but were unable to successfully fix up

View File

@ -86,6 +86,8 @@ asmlinkage void do_page_fault(unsigned long ecr, struct pt_regs *regs)
local_irq_enable(); local_irq_enable();
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
retry: retry:
down_read(&mm->mmap_sem); down_read(&mm->mmap_sem);
@ -228,9 +230,9 @@ asmlinkage void do_page_fault(unsigned long ecr, struct pt_regs *regs)
*/ */
out_of_memory: out_of_memory:
up_read(&mm->mmap_sem); up_read(&mm->mmap_sem);
pagefault_out_of_memory();
if (!user_mode(regs)) if (!user_mode(regs))
goto no_context; goto no_context;
pagefault_out_of_memory();
return; return;
do_sigbus: do_sigbus:

View File

@ -58,8 +58,7 @@ do_page_fault(unsigned long address, struct pt_regs *regs,
struct vm_area_struct * vma; struct vm_area_struct * vma;
siginfo_t info; siginfo_t info;
int fault; int fault;
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
((writeaccess & 1) ? FAULT_FLAG_WRITE : 0);
D(printk(KERN_DEBUG D(printk(KERN_DEBUG
"Page fault for %lX on %X at %lX, prot %d write %d\n", "Page fault for %lX on %X at %lX, prot %d write %d\n",
@ -117,6 +116,8 @@ do_page_fault(unsigned long address, struct pt_regs *regs,
if (in_atomic() || !mm) if (in_atomic() || !mm)
goto no_context; goto no_context;
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
retry: retry:
down_read(&mm->mmap_sem); down_read(&mm->mmap_sem);
vma = find_vma(mm, address); vma = find_vma(mm, address);
@ -155,6 +156,7 @@ do_page_fault(unsigned long address, struct pt_regs *regs,
} else if (writeaccess == 1) { } else if (writeaccess == 1) {
if (!(vma->vm_flags & VM_WRITE)) if (!(vma->vm_flags & VM_WRITE))
goto bad_area; goto bad_area;
flags |= FAULT_FLAG_WRITE;
} else { } else {
if (!(vma->vm_flags & (VM_READ | VM_EXEC))) if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
goto bad_area; goto bad_area;

View File

@ -34,11 +34,11 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear
struct vm_area_struct *vma; struct vm_area_struct *vma;
struct mm_struct *mm; struct mm_struct *mm;
unsigned long _pme, lrai, lrad, fixup; unsigned long _pme, lrai, lrad, fixup;
unsigned long flags = 0;
siginfo_t info; siginfo_t info;
pgd_t *pge; pgd_t *pge;
pud_t *pue; pud_t *pue;
pte_t *pte; pte_t *pte;
int write;
int fault; int fault;
#if 0 #if 0
@ -81,6 +81,9 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear
if (in_atomic() || !mm) if (in_atomic() || !mm)
goto no_context; goto no_context;
if (user_mode(__frame))
flags |= FAULT_FLAG_USER;
down_read(&mm->mmap_sem); down_read(&mm->mmap_sem);
vma = find_vma(mm, ear0); vma = find_vma(mm, ear0);
@ -129,7 +132,6 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear
*/ */
good_area: good_area:
info.si_code = SEGV_ACCERR; info.si_code = SEGV_ACCERR;
write = 0;
switch (esr0 & ESR0_ATXC) { switch (esr0 & ESR0_ATXC) {
default: default:
/* handle write to write protected page */ /* handle write to write protected page */
@ -140,7 +142,7 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear
#endif #endif
if (!(vma->vm_flags & VM_WRITE)) if (!(vma->vm_flags & VM_WRITE))
goto bad_area; goto bad_area;
write = 1; flags |= FAULT_FLAG_WRITE;
break; break;
/* handle read from protected page */ /* handle read from protected page */
@ -162,7 +164,7 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear
* make sure we exit gracefully rather than endlessly redo * make sure we exit gracefully rather than endlessly redo
* the fault. * the fault.
*/ */
fault = handle_mm_fault(mm, vma, ear0, write ? FAULT_FLAG_WRITE : 0); fault = handle_mm_fault(mm, vma, ear0, flags);
if (unlikely(fault & VM_FAULT_ERROR)) { if (unlikely(fault & VM_FAULT_ERROR)) {
if (fault & VM_FAULT_OOM) if (fault & VM_FAULT_OOM)
goto out_of_memory; goto out_of_memory;

View File

@ -53,8 +53,7 @@ void do_page_fault(unsigned long address, long cause, struct pt_regs *regs)
int si_code = SEGV_MAPERR; int si_code = SEGV_MAPERR;
int fault; int fault;
const struct exception_table_entry *fixup; const struct exception_table_entry *fixup;
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
(cause > 0 ? FAULT_FLAG_WRITE : 0);
/* /*
* If we're in an interrupt or have no user context, * If we're in an interrupt or have no user context,
@ -65,6 +64,8 @@ void do_page_fault(unsigned long address, long cause, struct pt_regs *regs)
local_irq_enable(); local_irq_enable();
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
retry: retry:
down_read(&mm->mmap_sem); down_read(&mm->mmap_sem);
vma = find_vma(mm, address); vma = find_vma(mm, address);
@ -96,6 +97,7 @@ void do_page_fault(unsigned long address, long cause, struct pt_regs *regs)
case FLT_STORE: case FLT_STORE:
if (!(vma->vm_flags & VM_WRITE)) if (!(vma->vm_flags & VM_WRITE))
goto bad_area; goto bad_area;
flags |= FAULT_FLAG_WRITE;
break; break;
} }

View File

@ -90,8 +90,6 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
mask = ((((isr >> IA64_ISR_X_BIT) & 1UL) << VM_EXEC_BIT) mask = ((((isr >> IA64_ISR_X_BIT) & 1UL) << VM_EXEC_BIT)
| (((isr >> IA64_ISR_W_BIT) & 1UL) << VM_WRITE_BIT)); | (((isr >> IA64_ISR_W_BIT) & 1UL) << VM_WRITE_BIT));
flags |= ((mask & VM_WRITE) ? FAULT_FLAG_WRITE : 0);
/* mmap_sem is performance critical.... */ /* mmap_sem is performance critical.... */
prefetchw(&mm->mmap_sem); prefetchw(&mm->mmap_sem);
@ -119,6 +117,10 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
if (notify_page_fault(regs, TRAP_BRKPT)) if (notify_page_fault(regs, TRAP_BRKPT))
return; return;
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
if (mask & VM_WRITE)
flags |= FAULT_FLAG_WRITE;
retry: retry:
down_read(&mm->mmap_sem); down_read(&mm->mmap_sem);

View File

@ -78,7 +78,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code,
struct mm_struct *mm; struct mm_struct *mm;
struct vm_area_struct * vma; struct vm_area_struct * vma;
unsigned long page, addr; unsigned long page, addr;
int write; unsigned long flags = 0;
int fault; int fault;
siginfo_t info; siginfo_t info;
@ -117,6 +117,9 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code,
if (in_atomic() || !mm) if (in_atomic() || !mm)
goto bad_area_nosemaphore; goto bad_area_nosemaphore;
if (error_code & ACE_USERMODE)
flags |= FAULT_FLAG_USER;
/* When running in the kernel we expect faults to occur only to /* When running in the kernel we expect faults to occur only to
* addresses in user space. All other faults represent errors in the * addresses in user space. All other faults represent errors in the
* kernel and should generate an OOPS. Unfortunately, in the case of an * kernel and should generate an OOPS. Unfortunately, in the case of an
@ -166,14 +169,13 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code,
*/ */
good_area: good_area:
info.si_code = SEGV_ACCERR; info.si_code = SEGV_ACCERR;
write = 0;
switch (error_code & (ACE_WRITE|ACE_PROTECTION)) { switch (error_code & (ACE_WRITE|ACE_PROTECTION)) {
default: /* 3: write, present */ default: /* 3: write, present */
/* fall through */ /* fall through */
case ACE_WRITE: /* write, not present */ case ACE_WRITE: /* write, not present */
if (!(vma->vm_flags & VM_WRITE)) if (!(vma->vm_flags & VM_WRITE))
goto bad_area; goto bad_area;
write++; flags |= FAULT_FLAG_WRITE;
break; break;
case ACE_PROTECTION: /* read, present */ case ACE_PROTECTION: /* read, present */
case 0: /* read, not present */ case 0: /* read, not present */
@ -194,7 +196,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code,
*/ */
addr = (address & PAGE_MASK); addr = (address & PAGE_MASK);
set_thread_fault_code(error_code); set_thread_fault_code(error_code);
fault = handle_mm_fault(mm, vma, addr, write ? FAULT_FLAG_WRITE : 0); fault = handle_mm_fault(mm, vma, addr, flags);
if (unlikely(fault & VM_FAULT_ERROR)) { if (unlikely(fault & VM_FAULT_ERROR)) {
if (fault & VM_FAULT_OOM) if (fault & VM_FAULT_OOM)
goto out_of_memory; goto out_of_memory;

View File

@ -88,6 +88,8 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
if (in_atomic() || !mm) if (in_atomic() || !mm)
goto no_context; goto no_context;
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
retry: retry:
down_read(&mm->mmap_sem); down_read(&mm->mmap_sem);

View File

@ -53,8 +53,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
struct vm_area_struct *vma, *prev_vma; struct vm_area_struct *vma, *prev_vma;
siginfo_t info; siginfo_t info;
int fault; int fault;
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
(write_access ? FAULT_FLAG_WRITE : 0);
tsk = current; tsk = current;
@ -109,6 +108,8 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
if (in_atomic() || !mm) if (in_atomic() || !mm)
goto no_context; goto no_context;
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
retry: retry:
down_read(&mm->mmap_sem); down_read(&mm->mmap_sem);
@ -121,6 +122,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
if (write_access) { if (write_access) {
if (!(vma->vm_flags & VM_WRITE)) if (!(vma->vm_flags & VM_WRITE))
goto bad_area; goto bad_area;
flags |= FAULT_FLAG_WRITE;
} else { } else {
if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
goto bad_area; goto bad_area;

View File

@ -92,8 +92,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long address,
int code = SEGV_MAPERR; int code = SEGV_MAPERR;
int is_write = error_code & ESR_S; int is_write = error_code & ESR_S;
int fault; int fault;
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
(is_write ? FAULT_FLAG_WRITE : 0);
regs->ear = address; regs->ear = address;
regs->esr = error_code; regs->esr = error_code;
@ -121,6 +120,9 @@ void do_page_fault(struct pt_regs *regs, unsigned long address,
die("Weird page fault", regs, SIGSEGV); die("Weird page fault", regs, SIGSEGV);
} }
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
/* When running in the kernel we expect faults to occur only to /* When running in the kernel we expect faults to occur only to
* addresses in user space. All other faults represent errors in the * addresses in user space. All other faults represent errors in the
* kernel and should generate an OOPS. Unfortunately, in the case of an * kernel and should generate an OOPS. Unfortunately, in the case of an
@ -199,6 +201,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long address,
if (unlikely(is_write)) { if (unlikely(is_write)) {
if (unlikely(!(vma->vm_flags & VM_WRITE))) if (unlikely(!(vma->vm_flags & VM_WRITE)))
goto bad_area; goto bad_area;
flags |= FAULT_FLAG_WRITE;
/* a read */ /* a read */
} else { } else {
/* protection fault */ /* protection fault */

View File

@ -42,8 +42,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write,
const int field = sizeof(unsigned long) * 2; const int field = sizeof(unsigned long) * 2;
siginfo_t info; siginfo_t info;
int fault; int fault;
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
(write ? FAULT_FLAG_WRITE : 0);
#if 0 #if 0
printk("Cpu%d[%s:%d:%0*lx:%ld:%0*lx]\n", raw_smp_processor_id(), printk("Cpu%d[%s:%d:%0*lx:%ld:%0*lx]\n", raw_smp_processor_id(),
@ -93,6 +92,8 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write,
if (in_atomic() || !mm) if (in_atomic() || !mm)
goto bad_area_nosemaphore; goto bad_area_nosemaphore;
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
retry: retry:
down_read(&mm->mmap_sem); down_read(&mm->mmap_sem);
vma = find_vma(mm, address); vma = find_vma(mm, address);
@ -114,6 +115,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write,
if (write) { if (write) {
if (!(vma->vm_flags & VM_WRITE)) if (!(vma->vm_flags & VM_WRITE))
goto bad_area; goto bad_area;
flags |= FAULT_FLAG_WRITE;
} else { } else {
if (cpu_has_rixi) { if (cpu_has_rixi) {
if (address == regs->cp0_epc && !(vma->vm_flags & VM_EXEC)) { if (address == regs->cp0_epc && !(vma->vm_flags & VM_EXEC)) {
@ -241,6 +243,8 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write,
* (which will retry the fault, or kill us if we got oom-killed). * (which will retry the fault, or kill us if we got oom-killed).
*/ */
up_read(&mm->mmap_sem); up_read(&mm->mmap_sem);
if (!user_mode(regs))
goto no_context;
pagefault_out_of_memory(); pagefault_out_of_memory();
return; return;

View File

@ -171,6 +171,8 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long fault_code,
if (in_atomic() || !mm) if (in_atomic() || !mm)
goto no_context; goto no_context;
if ((fault_code & MMUFCR_xFC_ACCESS) == MMUFCR_xFC_ACCESS_USR)
flags |= FAULT_FLAG_USER;
retry: retry:
down_read(&mm->mmap_sem); down_read(&mm->mmap_sem);

View File

@ -86,6 +86,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long address,
if (user_mode(regs)) { if (user_mode(regs)) {
/* Exception was in userspace: reenable interrupts */ /* Exception was in userspace: reenable interrupts */
local_irq_enable(); local_irq_enable();
flags |= FAULT_FLAG_USER;
} else { } else {
/* If exception was in a syscall, then IRQ's may have /* If exception was in a syscall, then IRQ's may have
* been enabled or disabled. If they were enabled, * been enabled or disabled. If they were enabled,

View File

@ -180,6 +180,10 @@ void do_page_fault(struct pt_regs *regs, unsigned long code,
if (in_atomic() || !mm) if (in_atomic() || !mm)
goto no_context; goto no_context;
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
if (acc_type & VM_WRITE)
flags |= FAULT_FLAG_WRITE;
retry: retry:
down_read(&mm->mmap_sem); down_read(&mm->mmap_sem);
vma = find_vma_prev(mm, address, &prev_vma); vma = find_vma_prev(mm, address, &prev_vma);
@ -203,8 +207,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long code,
* fault. * fault.
*/ */
fault = handle_mm_fault(mm, vma, address, fault = handle_mm_fault(mm, vma, address, flags);
flags | ((acc_type & VM_WRITE) ? FAULT_FLAG_WRITE : 0));
if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
return; return;

View File

@ -223,9 +223,6 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
is_write = error_code & ESR_DST; is_write = error_code & ESR_DST;
#endif /* CONFIG_4xx || CONFIG_BOOKE */ #endif /* CONFIG_4xx || CONFIG_BOOKE */
if (is_write)
flags |= FAULT_FLAG_WRITE;
#ifdef CONFIG_PPC_ICSWX #ifdef CONFIG_PPC_ICSWX
/* /*
* we need to do this early because this "data storage * we need to do this early because this "data storage
@ -288,6 +285,9 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
if (user_mode(regs)) if (user_mode(regs))
store_update_sp = store_updates_sp(regs); store_update_sp = store_updates_sp(regs);
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
/* When running in the kernel we expect faults to occur only to /* When running in the kernel we expect faults to occur only to
* addresses in user space. All other faults represent errors in the * addresses in user space. All other faults represent errors in the
* kernel and should generate an OOPS. Unfortunately, in the case of an * kernel and should generate an OOPS. Unfortunately, in the case of an
@ -415,6 +415,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
} else if (is_write) { } else if (is_write) {
if (!(vma->vm_flags & VM_WRITE)) if (!(vma->vm_flags & VM_WRITE))
goto bad_area; goto bad_area;
flags |= FAULT_FLAG_WRITE;
/* a read */ /* a read */
} else { } else {
/* protection fault */ /* protection fault */

View File

@ -302,6 +302,8 @@ static inline int do_exception(struct pt_regs *regs, int access)
address = trans_exc_code & __FAIL_ADDR_MASK; address = trans_exc_code & __FAIL_ADDR_MASK;
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400) if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400)
flags |= FAULT_FLAG_WRITE; flags |= FAULT_FLAG_WRITE;
down_read(&mm->mmap_sem); down_read(&mm->mmap_sem);

View File

@ -47,6 +47,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write,
struct task_struct *tsk = current; struct task_struct *tsk = current;
struct mm_struct *mm = tsk->mm; struct mm_struct *mm = tsk->mm;
const int field = sizeof(unsigned long) * 2; const int field = sizeof(unsigned long) * 2;
unsigned long flags = 0;
siginfo_t info; siginfo_t info;
int fault; int fault;
@ -75,6 +76,9 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write,
if (in_atomic() || !mm) if (in_atomic() || !mm)
goto bad_area_nosemaphore; goto bad_area_nosemaphore;
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
down_read(&mm->mmap_sem); down_read(&mm->mmap_sem);
vma = find_vma(mm, address); vma = find_vma(mm, address);
if (!vma) if (!vma)
@ -95,18 +99,18 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write,
if (write) { if (write) {
if (!(vma->vm_flags & VM_WRITE)) if (!(vma->vm_flags & VM_WRITE))
goto bad_area; goto bad_area;
flags |= FAULT_FLAG_WRITE;
} else { } else {
if (!(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC))) if (!(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)))
goto bad_area; goto bad_area;
} }
survive:
/* /*
* If for any reason at all we couldn't handle the fault, * If for any reason at all we couldn't handle the fault,
* make sure we exit gracefully rather than endlessly redo * make sure we exit gracefully rather than endlessly redo
* the fault. * the fault.
*/ */
fault = handle_mm_fault(mm, vma, address, write); fault = handle_mm_fault(mm, vma, address, flags);
if (unlikely(fault & VM_FAULT_ERROR)) { if (unlikely(fault & VM_FAULT_ERROR)) {
if (fault & VM_FAULT_OOM) if (fault & VM_FAULT_OOM)
goto out_of_memory; goto out_of_memory;
@ -167,11 +171,6 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write,
*/ */
out_of_memory: out_of_memory:
up_read(&mm->mmap_sem); up_read(&mm->mmap_sem);
if (is_global_init(tsk)) {
yield();
down_read(&mm->mmap_sem);
goto survive;
}
if (!user_mode(regs)) if (!user_mode(regs))
goto no_context; goto no_context;
pagefault_out_of_memory(); pagefault_out_of_memory();

View File

@ -400,9 +400,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
struct mm_struct *mm; struct mm_struct *mm;
struct vm_area_struct * vma; struct vm_area_struct * vma;
int fault; int fault;
int write = error_code & FAULT_CODE_WRITE; unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
unsigned int flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
(write ? FAULT_FLAG_WRITE : 0));
tsk = current; tsk = current;
mm = tsk->mm; mm = tsk->mm;
@ -476,6 +474,11 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
set_thread_fault_code(error_code); set_thread_fault_code(error_code);
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
if (error_code & FAULT_CODE_WRITE)
flags |= FAULT_FLAG_WRITE;
/* /*
* If for any reason at all we couldn't handle the fault, * If for any reason at all we couldn't handle the fault,
* make sure we exit gracefully rather than endlessly redo * make sure we exit gracefully rather than endlessly redo

View File

@ -177,8 +177,7 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write,
unsigned long g2; unsigned long g2;
int from_user = !(regs->psr & PSR_PS); int from_user = !(regs->psr & PSR_PS);
int fault, code; int fault, code;
unsigned int flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
(write ? FAULT_FLAG_WRITE : 0));
if (text_fault) if (text_fault)
address = regs->pc; address = regs->pc;
@ -235,6 +234,11 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write,
goto bad_area; goto bad_area;
} }
if (from_user)
flags |= FAULT_FLAG_USER;
if (write)
flags |= FAULT_FLAG_WRITE;
/* /*
* If for any reason at all we couldn't handle the fault, * If for any reason at all we couldn't handle the fault,
* make sure we exit gracefully rather than endlessly redo * make sure we exit gracefully rather than endlessly redo
@ -383,6 +387,7 @@ static void force_user_fault(unsigned long address, int write)
struct vm_area_struct *vma; struct vm_area_struct *vma;
struct task_struct *tsk = current; struct task_struct *tsk = current;
struct mm_struct *mm = tsk->mm; struct mm_struct *mm = tsk->mm;
unsigned int flags = FAULT_FLAG_USER;
int code; int code;
code = SEGV_MAPERR; code = SEGV_MAPERR;
@ -402,11 +407,12 @@ static void force_user_fault(unsigned long address, int write)
if (write) { if (write) {
if (!(vma->vm_flags & VM_WRITE)) if (!(vma->vm_flags & VM_WRITE))
goto bad_area; goto bad_area;
flags |= FAULT_FLAG_WRITE;
} else { } else {
if (!(vma->vm_flags & (VM_READ | VM_EXEC))) if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
goto bad_area; goto bad_area;
} }
switch (handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0)) { switch (handle_mm_fault(mm, vma, address, flags)) {
case VM_FAULT_SIGBUS: case VM_FAULT_SIGBUS:
case VM_FAULT_OOM: case VM_FAULT_OOM:
goto do_sigbus; goto do_sigbus;

View File

@ -315,7 +315,8 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)
bad_kernel_pc(regs, address); bad_kernel_pc(regs, address);
return; return;
} }
} } else
flags |= FAULT_FLAG_USER;
/* /*
* If we're in an interrupt or have no user * If we're in an interrupt or have no user
@ -418,13 +419,14 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)
vma->vm_file != NULL) vma->vm_file != NULL)
set_thread_fault_code(fault_code | set_thread_fault_code(fault_code |
FAULT_CODE_BLKCOMMIT); FAULT_CODE_BLKCOMMIT);
flags |= FAULT_FLAG_WRITE;
} else { } else {
/* Allow reads even for write-only mappings */ /* Allow reads even for write-only mappings */
if (!(vma->vm_flags & (VM_READ | VM_EXEC))) if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
goto bad_area; goto bad_area;
} }
flags |= ((fault_code & FAULT_CODE_WRITE) ? FAULT_FLAG_WRITE : 0);
fault = handle_mm_fault(mm, vma, address, flags); fault = handle_mm_fault(mm, vma, address, flags);
if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))

View File

@ -280,8 +280,7 @@ static int handle_page_fault(struct pt_regs *regs,
if (!is_page_fault) if (!is_page_fault)
write = 1; write = 1;
flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
(write ? FAULT_FLAG_WRITE : 0));
is_kernel_mode = !user_mode(regs); is_kernel_mode = !user_mode(regs);
@ -365,6 +364,9 @@ static int handle_page_fault(struct pt_regs *regs,
goto bad_area_nosemaphore; goto bad_area_nosemaphore;
} }
if (!is_kernel_mode)
flags |= FAULT_FLAG_USER;
/* /*
* When running in the kernel we expect faults to occur only to * When running in the kernel we expect faults to occur only to
* addresses in user space. All other faults represent errors in the * addresses in user space. All other faults represent errors in the
@ -425,12 +427,12 @@ static int handle_page_fault(struct pt_regs *regs,
#endif #endif
if (!(vma->vm_flags & VM_WRITE)) if (!(vma->vm_flags & VM_WRITE))
goto bad_area; goto bad_area;
flags |= FAULT_FLAG_WRITE;
} else { } else {
if (!is_page_fault || !(vma->vm_flags & VM_READ)) if (!is_page_fault || !(vma->vm_flags & VM_READ))
goto bad_area; goto bad_area;
} }
survive:
/* /*
* If for any reason at all we couldn't handle the fault, * If for any reason at all we couldn't handle the fault,
* make sure we exit gracefully rather than endlessly redo * make sure we exit gracefully rather than endlessly redo
@ -555,11 +557,6 @@ static int handle_page_fault(struct pt_regs *regs,
*/ */
out_of_memory: out_of_memory:
up_read(&mm->mmap_sem); up_read(&mm->mmap_sem);
if (is_global_init(tsk)) {
yield();
down_read(&mm->mmap_sem);
goto survive;
}
if (is_kernel_mode) if (is_kernel_mode)
goto no_context; goto no_context;
pagefault_out_of_memory(); pagefault_out_of_memory();

View File

@ -30,8 +30,7 @@ int handle_page_fault(unsigned long address, unsigned long ip,
pmd_t *pmd; pmd_t *pmd;
pte_t *pte; pte_t *pte;
int err = -EFAULT; int err = -EFAULT;
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
(is_write ? FAULT_FLAG_WRITE : 0);
*code_out = SEGV_MAPERR; *code_out = SEGV_MAPERR;
@ -42,6 +41,8 @@ int handle_page_fault(unsigned long address, unsigned long ip,
if (in_atomic()) if (in_atomic())
goto out_nosemaphore; goto out_nosemaphore;
if (is_user)
flags |= FAULT_FLAG_USER;
retry: retry:
down_read(&mm->mmap_sem); down_read(&mm->mmap_sem);
vma = find_vma(mm, address); vma = find_vma(mm, address);
@ -58,12 +59,15 @@ int handle_page_fault(unsigned long address, unsigned long ip,
good_area: good_area:
*code_out = SEGV_ACCERR; *code_out = SEGV_ACCERR;
if (is_write && !(vma->vm_flags & VM_WRITE)) if (is_write) {
goto out; if (!(vma->vm_flags & VM_WRITE))
goto out;
/* Don't require VM_READ|VM_EXEC for write faults! */ flags |= FAULT_FLAG_WRITE;
if (!is_write && !(vma->vm_flags & (VM_READ | VM_EXEC))) } else {
goto out; /* Don't require VM_READ|VM_EXEC for write faults! */
if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
goto out;
}
do { do {
int fault; int fault;
@ -124,6 +128,8 @@ int handle_page_fault(unsigned long address, unsigned long ip,
* (which will retry the fault, or kill us if we got oom-killed). * (which will retry the fault, or kill us if we got oom-killed).
*/ */
up_read(&mm->mmap_sem); up_read(&mm->mmap_sem);
if (!is_user)
goto out_nosemaphore;
pagefault_out_of_memory(); pagefault_out_of_memory();
return 0; return 0;
} }

View File

@ -209,8 +209,7 @@ static int do_pf(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
struct task_struct *tsk; struct task_struct *tsk;
struct mm_struct *mm; struct mm_struct *mm;
int fault, sig, code; int fault, sig, code;
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
((!(fsr ^ 0x12)) ? FAULT_FLAG_WRITE : 0);
tsk = current; tsk = current;
mm = tsk->mm; mm = tsk->mm;
@ -222,6 +221,11 @@ static int do_pf(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
if (in_atomic() || !mm) if (in_atomic() || !mm)
goto no_context; goto no_context;
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
if (!(fsr ^ 0x12))
flags |= FAULT_FLAG_WRITE;
/* /*
* As per x86, we may deadlock here. However, since the kernel only * As per x86, we may deadlock here. However, since the kernel only
* validly references user space from well defined areas of the code, * validly references user space from well defined areas of the code,
@ -278,6 +282,13 @@ static int do_pf(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
(VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS)))) (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS))))
return 0; return 0;
/*
* If we are in kernel mode at this point, we
* have no context to handle this fault with.
*/
if (!user_mode(regs))
goto no_context;
if (fault & VM_FAULT_OOM) { if (fault & VM_FAULT_OOM) {
/* /*
* We ran out of memory, call the OOM killer, and return to * We ran out of memory, call the OOM killer, and return to
@ -288,13 +299,6 @@ static int do_pf(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
return 0; return 0;
} }
/*
* If we are in kernel mode at this point, we
* have no context to handle this fault with.
*/
if (!user_mode(regs))
goto no_context;
if (fault & VM_FAULT_SIGBUS) { if (fault & VM_FAULT_SIGBUS) {
/* /*
* We had some memory, but were unable to * We had some memory, but were unable to

View File

@ -842,23 +842,15 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
force_sig_info_fault(SIGBUS, code, address, tsk, fault); force_sig_info_fault(SIGBUS, code, address, tsk, fault);
} }
static noinline int static noinline void
mm_fault_error(struct pt_regs *regs, unsigned long error_code, mm_fault_error(struct pt_regs *regs, unsigned long error_code,
unsigned long address, unsigned int fault) unsigned long address, unsigned int fault)
{ {
/* if (fatal_signal_pending(current) && !(error_code & PF_USER)) {
* Pagefault was interrupted by SIGKILL. We have no reason to up_read(&current->mm->mmap_sem);
* continue pagefault. no_context(regs, error_code, address, 0, 0);
*/ return;
if (fatal_signal_pending(current)) {
if (!(fault & VM_FAULT_RETRY))
up_read(&current->mm->mmap_sem);
if (!(error_code & PF_USER))
no_context(regs, error_code, address, 0, 0);
return 1;
} }
if (!(fault & VM_FAULT_ERROR))
return 0;
if (fault & VM_FAULT_OOM) { if (fault & VM_FAULT_OOM) {
/* Kernel mode? Handle exceptions or die: */ /* Kernel mode? Handle exceptions or die: */
@ -866,7 +858,7 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
up_read(&current->mm->mmap_sem); up_read(&current->mm->mmap_sem);
no_context(regs, error_code, address, no_context(regs, error_code, address,
SIGSEGV, SEGV_MAPERR); SIGSEGV, SEGV_MAPERR);
return 1; return;
} }
up_read(&current->mm->mmap_sem); up_read(&current->mm->mmap_sem);
@ -884,7 +876,6 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
else else
BUG(); BUG();
} }
return 1;
} }
static int spurious_fault_check(unsigned long error_code, pte_t *pte) static int spurious_fault_check(unsigned long error_code, pte_t *pte)
@ -1011,9 +1002,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code)
unsigned long address; unsigned long address;
struct mm_struct *mm; struct mm_struct *mm;
int fault; int fault;
int write = error_code & PF_WRITE; unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
(write ? FAULT_FLAG_WRITE : 0);
tsk = current; tsk = current;
mm = tsk->mm; mm = tsk->mm;
@ -1083,6 +1072,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code)
if (user_mode_vm(regs)) { if (user_mode_vm(regs)) {
local_irq_enable(); local_irq_enable();
error_code |= PF_USER; error_code |= PF_USER;
flags |= FAULT_FLAG_USER;
} else { } else {
if (regs->flags & X86_EFLAGS_IF) if (regs->flags & X86_EFLAGS_IF)
local_irq_enable(); local_irq_enable();
@ -1109,6 +1099,9 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code)
return; return;
} }
if (error_code & PF_WRITE)
flags |= FAULT_FLAG_WRITE;
/* /*
* When running in the kernel we expect faults to occur only to * When running in the kernel we expect faults to occur only to
* addresses in user space. All other faults represent errors in * addresses in user space. All other faults represent errors in
@ -1187,9 +1180,17 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code)
*/ */
fault = handle_mm_fault(mm, vma, address, flags); fault = handle_mm_fault(mm, vma, address, flags);
if (unlikely(fault & (VM_FAULT_RETRY|VM_FAULT_ERROR))) { /*
if (mm_fault_error(regs, error_code, address, fault)) * If we need to retry but a fatal signal is pending, handle the
return; * signal first. We do not need to release the mmap_sem because it
* would already be released in __lock_page_or_retry in mm/filemap.c.
*/
if (unlikely((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)))
return;
if (unlikely(fault & VM_FAULT_ERROR)) {
mm_fault_error(regs, error_code, address, fault);
return;
} }
/* /*

View File

@ -72,6 +72,8 @@ void do_page_fault(struct pt_regs *regs)
address, exccause, regs->pc, is_write? "w":"", is_exec? "x":""); address, exccause, regs->pc, is_write? "w":"", is_exec? "x":"");
#endif #endif
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
retry: retry:
down_read(&mm->mmap_sem); down_read(&mm->mmap_sem);
vma = find_vma(mm, address); vma = find_vma(mm, address);

View File

@ -125,13 +125,7 @@ static ssize_t node_read_meminfo(struct device *dev,
nid, K(node_page_state(nid, NR_WRITEBACK)), nid, K(node_page_state(nid, NR_WRITEBACK)),
nid, K(node_page_state(nid, NR_FILE_PAGES)), nid, K(node_page_state(nid, NR_FILE_PAGES)),
nid, K(node_page_state(nid, NR_FILE_MAPPED)), nid, K(node_page_state(nid, NR_FILE_MAPPED)),
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
nid, K(node_page_state(nid, NR_ANON_PAGES)
+ node_page_state(nid, NR_ANON_TRANSPARENT_HUGEPAGES) *
HPAGE_PMD_NR),
#else
nid, K(node_page_state(nid, NR_ANON_PAGES)), nid, K(node_page_state(nid, NR_ANON_PAGES)),
#endif
nid, K(node_page_state(nid, NR_SHMEM)), nid, K(node_page_state(nid, NR_SHMEM)),
nid, node_page_state(nid, NR_KERNEL_STACK) * nid, node_page_state(nid, NR_KERNEL_STACK) *
THREAD_SIZE / 1024, THREAD_SIZE / 1024,

View File

@ -50,7 +50,7 @@ static void adfs_write_failed(struct address_space *mapping, loff_t to)
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
if (to > inode->i_size) if (to > inode->i_size)
truncate_pagecache(inode, to, inode->i_size); truncate_pagecache(inode, inode->i_size);
} }
static int adfs_write_begin(struct file *file, struct address_space *mapping, static int adfs_write_begin(struct file *file, struct address_space *mapping,

View File

@ -406,7 +406,7 @@ static void affs_write_failed(struct address_space *mapping, loff_t to)
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
if (to > inode->i_size) { if (to > inode->i_size) {
truncate_pagecache(inode, to, inode->i_size); truncate_pagecache(inode, inode->i_size);
affs_truncate(inode); affs_truncate(inode);
} }
} }

View File

@ -166,7 +166,7 @@ static void bfs_write_failed(struct address_space *mapping, loff_t to)
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
if (to > inode->i_size) if (to > inode->i_size)
truncate_pagecache(inode, to, inode->i_size); truncate_pagecache(inode, inode->i_size);
} }
static int bfs_write_begin(struct file *file, struct address_space *mapping, static int bfs_write_begin(struct file *file, struct address_space *mapping,

View File

@ -221,12 +221,10 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
struct btrfs_path *path, struct btrfs_path *path,
struct inode *inode) struct inode *inode)
{ {
loff_t oldsize;
int ret = 0; int ret = 0;
oldsize = i_size_read(inode);
btrfs_i_size_write(inode, 0); btrfs_i_size_write(inode, 0);
truncate_pagecache(inode, oldsize, 0); truncate_pagecache(inode, 0);
/* /*
* We don't need an orphan item because truncating the free space cache * We don't need an orphan item because truncating the free space cache

View File

@ -4349,7 +4349,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
inode->i_ctime = inode->i_mtime = current_fs_time(inode->i_sb); inode->i_ctime = inode->i_mtime = current_fs_time(inode->i_sb);
if (newsize > oldsize) { if (newsize > oldsize) {
truncate_pagecache(inode, oldsize, newsize); truncate_pagecache(inode, newsize);
ret = btrfs_cont_expand(inode, oldsize, newsize); ret = btrfs_cont_expand(inode, oldsize, newsize);
if (ret) if (ret)
return ret; return ret;

View File

@ -1856,14 +1856,11 @@ static int cifs_truncate_page(struct address_space *mapping, loff_t from)
static void cifs_setsize(struct inode *inode, loff_t offset) static void cifs_setsize(struct inode *inode, loff_t offset)
{ {
loff_t oldsize;
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
oldsize = inode->i_size;
i_size_write(inode, offset); i_size_write(inode, offset);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
truncate_pagecache(inode, oldsize, offset); truncate_pagecache(inode, offset);
} }
static int static int

View File

@ -861,7 +861,7 @@ static int exofs_writepage(struct page *page, struct writeback_control *wbc)
static void _write_failed(struct inode *inode, loff_t to) static void _write_failed(struct inode *inode, loff_t to)
{ {
if (to > inode->i_size) if (to > inode->i_size)
truncate_pagecache(inode, to, inode->i_size); truncate_pagecache(inode, inode->i_size);
} }
int exofs_write_begin(struct file *file, struct address_space *mapping, int exofs_write_begin(struct file *file, struct address_space *mapping,

View File

@ -58,7 +58,7 @@ static void ext2_write_failed(struct address_space *mapping, loff_t to)
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
if (to > inode->i_size) { if (to > inode->i_size) {
truncate_pagecache(inode, to, inode->i_size); truncate_pagecache(inode, inode->i_size);
ext2_truncate_blocks(inode, inode->i_size); ext2_truncate_blocks(inode, inode->i_size);
} }
} }

View File

@ -4587,7 +4587,6 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
if (attr->ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) { if (attr->ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) {
handle_t *handle; handle_t *handle;
loff_t oldsize = inode->i_size;
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@ -4650,7 +4649,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
* Truncate pagecache after we've waited for commit * Truncate pagecache after we've waited for commit
* in data=journal mode to make pages freeable. * in data=journal mode to make pages freeable.
*/ */
truncate_pagecache(inode, oldsize, inode->i_size); truncate_pagecache(inode, inode->i_size);
} }
/* /*
* We want to call ext4_truncate() even if attr->ia_size == * We want to call ext4_truncate() even if attr->ia_size ==

View File

@ -147,7 +147,7 @@ static void fat_write_failed(struct address_space *mapping, loff_t to)
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
if (to > inode->i_size) { if (to > inode->i_size) {
truncate_pagecache(inode, to, inode->i_size); truncate_pagecache(inode, inode->i_size);
fat_truncate_blocks(inode, inode->i_size); fat_truncate_blocks(inode, inode->i_size);
} }
} }

View File

@ -1678,7 +1678,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
* FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock. * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
*/ */
if (S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) { if (S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
truncate_pagecache(inode, oldsize, outarg.attr.size); truncate_pagecache(inode, outarg.attr.size);
invalidate_inode_pages2(inode->i_mapping); invalidate_inode_pages2(inode->i_mapping);
} }

View File

@ -218,7 +218,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
bool inval = false; bool inval = false;
if (oldsize != attr->size) { if (oldsize != attr->size) {
truncate_pagecache(inode, oldsize, attr->size); truncate_pagecache(inode, attr->size);
inval = true; inval = true;
} else if (fc->auto_inval_data) { } else if (fc->auto_inval_data) {
struct timespec new_mtime = { struct timespec new_mtime = {

View File

@ -1016,7 +1016,7 @@ static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 newsize
chunk = oldsize - newsize; chunk = oldsize - newsize;
if (chunk > max_chunk) if (chunk > max_chunk)
chunk = max_chunk; chunk = max_chunk;
truncate_pagecache(inode, oldsize, oldsize - chunk); truncate_pagecache(inode, oldsize - chunk);
oldsize -= chunk; oldsize -= chunk;
gfs2_trans_end(sdp); gfs2_trans_end(sdp);
error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES); error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
@ -1067,7 +1067,7 @@ static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize)
if (journaled) if (journaled)
error = gfs2_journaled_truncate(inode, oldsize, newsize); error = gfs2_journaled_truncate(inode, oldsize, newsize);
else else
truncate_pagecache(inode, oldsize, newsize); truncate_pagecache(inode, newsize);
if (error) { if (error) {
brelse(dibh); brelse(dibh);

View File

@ -41,7 +41,7 @@ static void hfs_write_failed(struct address_space *mapping, loff_t to)
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
if (to > inode->i_size) { if (to > inode->i_size) {
truncate_pagecache(inode, to, inode->i_size); truncate_pagecache(inode, inode->i_size);
hfs_file_truncate(inode); hfs_file_truncate(inode);
} }
} }

View File

@ -36,7 +36,7 @@ static void hfsplus_write_failed(struct address_space *mapping, loff_t to)
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
if (to > inode->i_size) { if (to > inode->i_size) {
truncate_pagecache(inode, to, inode->i_size); truncate_pagecache(inode, inode->i_size);
hfsplus_file_truncate(inode); hfsplus_file_truncate(inode);
} }
} }

View File

@ -138,7 +138,7 @@ static void hpfs_write_failed(struct address_space *mapping, loff_t to)
hpfs_lock(inode->i_sb); hpfs_lock(inode->i_sb);
if (to > inode->i_size) { if (to > inode->i_size) {
truncate_pagecache(inode, to, inode->i_size); truncate_pagecache(inode, inode->i_size);
hpfs_truncate(inode); hpfs_truncate(inode);
} }

View File

@ -306,7 +306,7 @@ static void jfs_write_failed(struct address_space *mapping, loff_t to)
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
if (to > inode->i_size) { if (to > inode->i_size) {
truncate_pagecache(inode, to, inode->i_size); truncate_pagecache(inode, inode->i_size);
jfs_truncate(inode); jfs_truncate(inode);
} }
} }

View File

@ -400,7 +400,7 @@ static void minix_write_failed(struct address_space *mapping, loff_t to)
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
if (to > inode->i_size) { if (to > inode->i_size) {
truncate_pagecache(inode, to, inode->i_size); truncate_pagecache(inode, inode->i_size);
minix_truncate(inode); minix_truncate(inode);
} }
} }

View File

@ -541,7 +541,6 @@ EXPORT_SYMBOL_GPL(nfs_setattr);
*/ */
static int nfs_vmtruncate(struct inode * inode, loff_t offset) static int nfs_vmtruncate(struct inode * inode, loff_t offset)
{ {
loff_t oldsize;
int err; int err;
err = inode_newsize_ok(inode, offset); err = inode_newsize_ok(inode, offset);
@ -549,11 +548,10 @@ static int nfs_vmtruncate(struct inode * inode, loff_t offset)
goto out; goto out;
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
oldsize = inode->i_size;
i_size_write(inode, offset); i_size_write(inode, offset);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
truncate_pagecache(inode, oldsize, offset); truncate_pagecache(inode, offset);
out: out:
return err; return err;
} }

View File

@ -254,7 +254,7 @@ void nilfs_write_failed(struct address_space *mapping, loff_t to)
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
if (to > inode->i_size) { if (to > inode->i_size) {
truncate_pagecache(inode, to, inode->i_size); truncate_pagecache(inode, inode->i_size);
nilfs_truncate(inode); nilfs_truncate(inode);
} }
} }

View File

@ -1768,7 +1768,7 @@ static void ntfs_write_failed(struct address_space *mapping, loff_t to)
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
if (to > inode->i_size) { if (to > inode->i_size) {
truncate_pagecache(inode, to, inode->i_size); truncate_pagecache(inode, inode->i_size);
ntfs_truncate_vfs(inode); ntfs_truncate_vfs(inode);
} }
} }

View File

@ -311,7 +311,7 @@ static void omfs_write_failed(struct address_space *mapping, loff_t to)
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
if (to > inode->i_size) { if (to > inode->i_size) {
truncate_pagecache(inode, to, inode->i_size); truncate_pagecache(inode, inode->i_size);
omfs_truncate(inode); omfs_truncate(inode);
} }
} }

View File

@ -132,13 +132,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
K(i.freeswap), K(i.freeswap),
K(global_page_state(NR_FILE_DIRTY)), K(global_page_state(NR_FILE_DIRTY)),
K(global_page_state(NR_WRITEBACK)), K(global_page_state(NR_WRITEBACK)),
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
K(global_page_state(NR_ANON_PAGES)
+ global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) *
HPAGE_PMD_NR),
#else
K(global_page_state(NR_ANON_PAGES)), K(global_page_state(NR_ANON_PAGES)),
#endif
K(global_page_state(NR_FILE_MAPPED)), K(global_page_state(NR_FILE_MAPPED)),
K(global_page_state(NR_SHMEM)), K(global_page_state(NR_SHMEM)),
K(global_page_state(NR_SLAB_RECLAIMABLE) + K(global_page_state(NR_SLAB_RECLAIMABLE) +

View File

@ -469,7 +469,7 @@ static void sysv_write_failed(struct address_space *mapping, loff_t to)
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
if (to > inode->i_size) { if (to > inode->i_size) {
truncate_pagecache(inode, to, inode->i_size); truncate_pagecache(inode, inode->i_size);
sysv_truncate(inode); sysv_truncate(inode);
} }
} }

View File

@ -172,7 +172,7 @@ static void udf_write_failed(struct address_space *mapping, loff_t to)
loff_t isize = inode->i_size; loff_t isize = inode->i_size;
if (to > isize) { if (to > isize) {
truncate_pagecache(inode, to, isize); truncate_pagecache(inode, isize);
if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) { if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
down_write(&iinfo->i_data_sem); down_write(&iinfo->i_data_sem);
udf_clear_extent_cache(inode); udf_clear_extent_cache(inode);

View File

@ -531,7 +531,7 @@ static void ufs_write_failed(struct address_space *mapping, loff_t to)
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
if (to > inode->i_size) if (to > inode->i_size)
truncate_pagecache(inode, to, inode->i_size); truncate_pagecache(inode, inode->i_size);
} }
static int ufs_write_begin(struct file *file, struct address_space *mapping, static int ufs_write_begin(struct file *file, struct address_space *mapping,

View File

@ -1582,7 +1582,7 @@ xfs_vm_write_begin(
unlock_page(page); unlock_page(page);
if (pos + len > i_size_read(inode)) if (pos + len > i_size_read(inode))
truncate_pagecache(inode, pos + len, i_size_read(inode)); truncate_pagecache(inode, i_size_read(inode));
page_cache_release(page); page_cache_release(page);
page = NULL; page = NULL;
@ -1618,7 +1618,7 @@ xfs_vm_write_end(
loff_t to = pos + len; loff_t to = pos + len;
if (to > isize) { if (to > isize) {
truncate_pagecache(inode, to, isize); truncate_pagecache(inode, isize);
xfs_vm_kill_delalloc_range(inode, isize, to); xfs_vm_kill_delalloc_range(inode, isize, to);
} }
} }

View File

@ -96,9 +96,6 @@ extern int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
pmd_t *dst_pmd, pmd_t *src_pmd, pmd_t *dst_pmd, pmd_t *src_pmd,
struct vm_area_struct *vma, struct vm_area_struct *vma,
unsigned long addr, unsigned long end); unsigned long addr, unsigned long end);
extern int handle_pte_fault(struct mm_struct *mm,
struct vm_area_struct *vma, unsigned long address,
pte_t *pte, pmd_t *pmd, unsigned int flags);
extern int split_huge_page_to_list(struct page *page, struct list_head *list); extern int split_huge_page_to_list(struct page *page, struct list_head *list);
static inline int split_huge_page(struct page *page) static inline int split_huge_page(struct page *page)
{ {

View File

@ -30,9 +30,21 @@ struct page;
struct mm_struct; struct mm_struct;
struct kmem_cache; struct kmem_cache;
/* Stats that can be updated by kernel. */ /*
enum mem_cgroup_page_stat_item { * The corresponding mem_cgroup_stat_names is defined in mm/memcontrol.c,
MEMCG_NR_FILE_MAPPED, /* # of pages charged as file rss */ * These two lists should keep in accord with each other.
*/
enum mem_cgroup_stat_index {
/*
* For MEM_CONTAINER_TYPE_ALL, usage = pagecache + rss.
*/
MEM_CGROUP_STAT_CACHE, /* # of pages charged as cache */
MEM_CGROUP_STAT_RSS, /* # of pages charged as anon rss */
MEM_CGROUP_STAT_RSS_HUGE, /* # of pages charged as anon huge */
MEM_CGROUP_STAT_FILE_MAPPED, /* # of pages charged as file rss */
MEM_CGROUP_STAT_WRITEBACK, /* # of pages under writeback */
MEM_CGROUP_STAT_SWAP, /* # of pages, swapped out */
MEM_CGROUP_STAT_NSTATS,
}; };
struct mem_cgroup_reclaim_cookie { struct mem_cgroup_reclaim_cookie {
@ -41,6 +53,23 @@ struct mem_cgroup_reclaim_cookie {
unsigned int generation; unsigned int generation;
}; };
enum mem_cgroup_filter_t {
VISIT, /* visit current node */
SKIP, /* skip the current node and continue traversal */
SKIP_TREE, /* skip the whole subtree and continue traversal */
};
/*
* mem_cgroup_filter_t predicate might instruct mem_cgroup_iter_cond how to
* iterate through the hierarchy tree. Each tree element is checked by the
* predicate before it is returned by the iterator. If a filter returns
* SKIP or SKIP_TREE then the iterator code continues traversal (with the
* next node down the hierarchy or the next node that doesn't belong under the
* memcg's subtree).
*/
typedef enum mem_cgroup_filter_t
(*mem_cgroup_iter_filter)(struct mem_cgroup *memcg, struct mem_cgroup *root);
#ifdef CONFIG_MEMCG #ifdef CONFIG_MEMCG
/* /*
* All "charge" functions with gfp_mask should use GFP_KERNEL or * All "charge" functions with gfp_mask should use GFP_KERNEL or
@ -108,9 +137,18 @@ mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
extern void mem_cgroup_end_migration(struct mem_cgroup *memcg, extern void mem_cgroup_end_migration(struct mem_cgroup *memcg,
struct page *oldpage, struct page *newpage, bool migration_ok); struct page *oldpage, struct page *newpage, bool migration_ok);
struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *, struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root,
struct mem_cgroup *, struct mem_cgroup *prev,
struct mem_cgroup_reclaim_cookie *); struct mem_cgroup_reclaim_cookie *reclaim,
mem_cgroup_iter_filter cond);
static inline struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
struct mem_cgroup *prev,
struct mem_cgroup_reclaim_cookie *reclaim)
{
return mem_cgroup_iter_cond(root, prev, reclaim, NULL);
}
void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *); void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
/* /*
@ -125,6 +163,48 @@ extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
extern void mem_cgroup_replace_page_cache(struct page *oldpage, extern void mem_cgroup_replace_page_cache(struct page *oldpage,
struct page *newpage); struct page *newpage);
/**
* mem_cgroup_toggle_oom - toggle the memcg OOM killer for the current task
* @new: true to enable, false to disable
*
* Toggle whether a failed memcg charge should invoke the OOM killer
* or just return -ENOMEM. Returns the previous toggle state.
*
* NOTE: Any path that enables the OOM killer before charging must
* call mem_cgroup_oom_synchronize() afterward to finalize the
* OOM handling and clean up.
*/
static inline bool mem_cgroup_toggle_oom(bool new)
{
bool old;
old = current->memcg_oom.may_oom;
current->memcg_oom.may_oom = new;
return old;
}
static inline void mem_cgroup_enable_oom(void)
{
bool old = mem_cgroup_toggle_oom(true);
WARN_ON(old == true);
}
static inline void mem_cgroup_disable_oom(void)
{
bool old = mem_cgroup_toggle_oom(false);
WARN_ON(old == false);
}
static inline bool task_in_memcg_oom(struct task_struct *p)
{
return p->memcg_oom.in_memcg_oom;
}
bool mem_cgroup_oom_synchronize(void);
#ifdef CONFIG_MEMCG_SWAP #ifdef CONFIG_MEMCG_SWAP
extern int do_swap_account; extern int do_swap_account;
#endif #endif
@ -165,24 +245,24 @@ static inline void mem_cgroup_end_update_page_stat(struct page *page,
} }
void mem_cgroup_update_page_stat(struct page *page, void mem_cgroup_update_page_stat(struct page *page,
enum mem_cgroup_page_stat_item idx, enum mem_cgroup_stat_index idx,
int val); int val);
static inline void mem_cgroup_inc_page_stat(struct page *page, static inline void mem_cgroup_inc_page_stat(struct page *page,
enum mem_cgroup_page_stat_item idx) enum mem_cgroup_stat_index idx)
{ {
mem_cgroup_update_page_stat(page, idx, 1); mem_cgroup_update_page_stat(page, idx, 1);
} }
static inline void mem_cgroup_dec_page_stat(struct page *page, static inline void mem_cgroup_dec_page_stat(struct page *page,
enum mem_cgroup_page_stat_item idx) enum mem_cgroup_stat_index idx)
{ {
mem_cgroup_update_page_stat(page, idx, -1); mem_cgroup_update_page_stat(page, idx, -1);
} }
unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, enum mem_cgroup_filter_t
gfp_t gfp_mask, mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
unsigned long *total_scanned); struct mem_cgroup *root);
void __mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx); void __mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx);
static inline void mem_cgroup_count_vm_event(struct mm_struct *mm, static inline void mem_cgroup_count_vm_event(struct mm_struct *mm,
@ -296,6 +376,15 @@ static inline void mem_cgroup_end_migration(struct mem_cgroup *memcg,
struct page *oldpage, struct page *newpage, bool migration_ok) struct page *oldpage, struct page *newpage, bool migration_ok)
{ {
} }
static inline struct mem_cgroup *
mem_cgroup_iter_cond(struct mem_cgroup *root,
struct mem_cgroup *prev,
struct mem_cgroup_reclaim_cookie *reclaim,
mem_cgroup_iter_filter cond)
{
/* first call must return non-NULL, second return NULL */
return (struct mem_cgroup *)(unsigned long)!prev;
}
static inline struct mem_cgroup * static inline struct mem_cgroup *
mem_cgroup_iter(struct mem_cgroup *root, mem_cgroup_iter(struct mem_cgroup *root,
@ -348,22 +437,45 @@ static inline void mem_cgroup_end_update_page_stat(struct page *page,
{ {
} }
static inline bool mem_cgroup_toggle_oom(bool new)
{
return false;
}
static inline void mem_cgroup_enable_oom(void)
{
}
static inline void mem_cgroup_disable_oom(void)
{
}
static inline bool task_in_memcg_oom(struct task_struct *p)
{
return false;
}
static inline bool mem_cgroup_oom_synchronize(void)
{
return false;
}
static inline void mem_cgroup_inc_page_stat(struct page *page, static inline void mem_cgroup_inc_page_stat(struct page *page,
enum mem_cgroup_page_stat_item idx) enum mem_cgroup_stat_index idx)
{ {
} }
static inline void mem_cgroup_dec_page_stat(struct page *page, static inline void mem_cgroup_dec_page_stat(struct page *page,
enum mem_cgroup_page_stat_item idx) enum mem_cgroup_stat_index idx)
{ {
} }
static inline static inline
unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, enum mem_cgroup_filter_t
gfp_t gfp_mask, mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
unsigned long *total_scanned) struct mem_cgroup *root)
{ {
return 0; return VISIT;
} }
static inline void mem_cgroup_split_huge_fixup(struct page *head) static inline void mem_cgroup_split_huge_fixup(struct page *head)

View File

@ -176,6 +176,7 @@ extern pgprot_t protection_map[16];
#define FAULT_FLAG_RETRY_NOWAIT 0x10 /* Don't drop mmap_sem and wait when retrying */ #define FAULT_FLAG_RETRY_NOWAIT 0x10 /* Don't drop mmap_sem and wait when retrying */
#define FAULT_FLAG_KILLABLE 0x20 /* The fault task is in SIGKILL killable region */ #define FAULT_FLAG_KILLABLE 0x20 /* The fault task is in SIGKILL killable region */
#define FAULT_FLAG_TRIED 0x40 /* second try */ #define FAULT_FLAG_TRIED 0x40 /* second try */
#define FAULT_FLAG_USER 0x80 /* The fault originated in userspace */
/* /*
* vm_fault is filled by the the pagefault handler and passed to the vma's * vm_fault is filled by the the pagefault handler and passed to the vma's
@ -876,11 +877,12 @@ static inline int page_mapped(struct page *page)
#define VM_FAULT_NOPAGE 0x0100 /* ->fault installed the pte, not return page */ #define VM_FAULT_NOPAGE 0x0100 /* ->fault installed the pte, not return page */
#define VM_FAULT_LOCKED 0x0200 /* ->fault locked the returned page */ #define VM_FAULT_LOCKED 0x0200 /* ->fault locked the returned page */
#define VM_FAULT_RETRY 0x0400 /* ->fault blocked, must retry */ #define VM_FAULT_RETRY 0x0400 /* ->fault blocked, must retry */
#define VM_FAULT_FALLBACK 0x0800 /* huge page fault failed, fall back to small */
#define VM_FAULT_HWPOISON_LARGE_MASK 0xf000 /* encodes hpage index for large hwpoison */ #define VM_FAULT_HWPOISON_LARGE_MASK 0xf000 /* encodes hpage index for large hwpoison */
#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_HWPOISON | \ #define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_HWPOISON | \
VM_FAULT_HWPOISON_LARGE) VM_FAULT_FALLBACK | VM_FAULT_HWPOISON_LARGE)
/* Encode hstate index for a hwpoisoned large page */ /* Encode hstate index for a hwpoisoned large page */
#define VM_FAULT_SET_HINDEX(x) ((x) << 12) #define VM_FAULT_SET_HINDEX(x) ((x) << 12)
@ -984,7 +986,7 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
unmap_mapping_range(mapping, holebegin, holelen, 0); unmap_mapping_range(mapping, holebegin, holelen, 0);
} }
extern void truncate_pagecache(struct inode *inode, loff_t old, loff_t new); extern void truncate_pagecache(struct inode *inode, loff_t new);
extern void truncate_setsize(struct inode *inode, loff_t newsize); extern void truncate_setsize(struct inode *inode, loff_t newsize);
void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end); void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end);
int truncate_inode_page(struct address_space *mapping, struct page *page); int truncate_inode_page(struct address_space *mapping, struct page *page);

View File

@ -54,7 +54,7 @@ struct res_counter {
struct res_counter *parent; struct res_counter *parent;
}; };
#define RESOURCE_MAX (unsigned long long)LLONG_MAX #define RES_COUNTER_MAX ULLONG_MAX
/** /**
* Helpers to interact with userspace * Helpers to interact with userspace

View File

@ -1393,6 +1393,13 @@ struct task_struct {
unsigned long memsw_nr_pages; /* uncharged mem+swap usage */ unsigned long memsw_nr_pages; /* uncharged mem+swap usage */
} memcg_batch; } memcg_batch;
unsigned int memcg_kmem_skip_account; unsigned int memcg_kmem_skip_account;
struct memcg_oom_info {
unsigned int may_oom:1;
unsigned int in_memcg_oom:1;
unsigned int oom_locked:1;
int wakeups;
struct mem_cgroup *wait_on_memcg;
} memcg_oom;
#endif #endif
#ifdef CONFIG_UPROBES #ifdef CONFIG_UPROBES
struct uprobe_task *utask; struct uprobe_task *utask;

View File

@ -280,7 +280,7 @@ extern void activate_page(struct page *);
extern void mark_page_accessed(struct page *); extern void mark_page_accessed(struct page *);
extern void lru_add_drain(void); extern void lru_add_drain(void);
extern void lru_add_drain_cpu(int cpu); extern void lru_add_drain_cpu(int cpu);
extern int lru_add_drain_all(void); extern void lru_add_drain_all(void);
extern void rotate_reclaimable_page(struct page *page); extern void rotate_reclaimable_page(struct page *page);
extern void deactivate_page(struct page *page); extern void deactivate_page(struct page *page);
extern void swap_setup(void); extern void swap_setup(void);

View File

@ -74,7 +74,7 @@ static int __init gcov_persist_setup(char *str)
{ {
unsigned long val; unsigned long val;
if (strict_strtoul(str, 0, &val)) { if (kstrtoul(str, 0, &val)) {
pr_warning("invalid gcov_persist parameter '%s'\n", str); pr_warning("invalid gcov_persist parameter '%s'\n", str);
return 0; return 0;
} }

View File

@ -113,7 +113,7 @@ static ssize_t kexec_crash_size_store(struct kobject *kobj,
unsigned long cnt; unsigned long cnt;
int ret; int ret;
if (strict_strtoul(buf, 0, &cnt)) if (kstrtoul(buf, 0, &cnt))
return -EINVAL; return -EINVAL;
ret = crash_shrink_memory(cnt); ret = crash_shrink_memory(cnt);

View File

@ -253,13 +253,13 @@ int parse_args(const char *doing,
EXPORT_SYMBOL(param_ops_##name) EXPORT_SYMBOL(param_ops_##name)
STANDARD_PARAM_DEF(byte, unsigned char, "%hhu", unsigned long, strict_strtoul); STANDARD_PARAM_DEF(byte, unsigned char, "%hhu", unsigned long, kstrtoul);
STANDARD_PARAM_DEF(short, short, "%hi", long, strict_strtol); STANDARD_PARAM_DEF(short, short, "%hi", long, kstrtoul);
STANDARD_PARAM_DEF(ushort, unsigned short, "%hu", unsigned long, strict_strtoul); STANDARD_PARAM_DEF(ushort, unsigned short, "%hu", unsigned long, kstrtoul);
STANDARD_PARAM_DEF(int, int, "%i", long, strict_strtol); STANDARD_PARAM_DEF(int, int, "%i", long, kstrtoul);
STANDARD_PARAM_DEF(uint, unsigned int, "%u", unsigned long, strict_strtoul); STANDARD_PARAM_DEF(uint, unsigned int, "%u", unsigned long, kstrtoul);
STANDARD_PARAM_DEF(long, long, "%li", long, strict_strtol); STANDARD_PARAM_DEF(long, long, "%li", long, kstrtoul);
STANDARD_PARAM_DEF(ulong, unsigned long, "%lu", unsigned long, strict_strtoul); STANDARD_PARAM_DEF(ulong, unsigned long, "%lu", unsigned long, kstrtoul);
int param_set_charp(const char *val, const struct kernel_param *kp) int param_set_charp(const char *val, const struct kernel_param *kp)
{ {

View File

@ -17,8 +17,8 @@
void res_counter_init(struct res_counter *counter, struct res_counter *parent) void res_counter_init(struct res_counter *counter, struct res_counter *parent)
{ {
spin_lock_init(&counter->lock); spin_lock_init(&counter->lock);
counter->limit = RESOURCE_MAX; counter->limit = RES_COUNTER_MAX;
counter->soft_limit = RESOURCE_MAX; counter->soft_limit = RES_COUNTER_MAX;
counter->parent = parent; counter->parent = parent;
} }
@ -178,23 +178,30 @@ u64 res_counter_read_u64(struct res_counter *counter, int member)
#endif #endif
int res_counter_memparse_write_strategy(const char *buf, int res_counter_memparse_write_strategy(const char *buf,
unsigned long long *res) unsigned long long *resp)
{ {
char *end; char *end;
unsigned long long res;
/* return RESOURCE_MAX(unlimited) if "-1" is specified */ /* return RES_COUNTER_MAX(unlimited) if "-1" is specified */
if (*buf == '-') { if (*buf == '-') {
*res = simple_strtoull(buf + 1, &end, 10); res = simple_strtoull(buf + 1, &end, 10);
if (*res != 1 || *end != '\0') if (res != 1 || *end != '\0')
return -EINVAL; return -EINVAL;
*res = RESOURCE_MAX; *resp = RES_COUNTER_MAX;
return 0; return 0;
} }
*res = memparse(buf, &end); res = memparse(buf, &end);
if (*end != '\0') if (*end != '\0')
return -EINVAL; return -EINVAL;
*res = PAGE_ALIGN(*res); if (PAGE_ALIGN(res) >= res)
res = PAGE_ALIGN(res);
else
res = RES_COUNTER_MAX;
*resp = res;
return 0; return 0;
} }

View File

@ -245,7 +245,7 @@ config COMPACTION
config MIGRATION config MIGRATION
bool "Page migration" bool "Page migration"
def_bool y def_bool y
depends on NUMA || ARCH_ENABLE_MEMORY_HOTREMOVE || COMPACTION || CMA depends on (NUMA || ARCH_ENABLE_MEMORY_HOTREMOVE || COMPACTION || CMA) && MMU
help help
Allows the migration of the physical location of pages of processes Allows the migration of the physical location of pages of processes
while the virtual addresses are not changed. This is useful in while the virtual addresses are not changed. This is useful in
@ -480,7 +480,7 @@ config FRONTSWAP
config CMA config CMA
bool "Contiguous Memory Allocator" bool "Contiguous Memory Allocator"
depends on HAVE_MEMBLOCK depends on HAVE_MEMBLOCK && MMU
select MIGRATION select MIGRATION
select MEMORY_ISOLATION select MEMORY_ISOLATION
help help

View File

@ -467,32 +467,34 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
error = mem_cgroup_cache_charge(page, current->mm, error = mem_cgroup_cache_charge(page, current->mm,
gfp_mask & GFP_RECLAIM_MASK); gfp_mask & GFP_RECLAIM_MASK);
if (error) if (error)
goto out; return error;
error = radix_tree_maybe_preload(gfp_mask & ~__GFP_HIGHMEM); error = radix_tree_maybe_preload(gfp_mask & ~__GFP_HIGHMEM);
if (error == 0) { if (error) {
page_cache_get(page);
page->mapping = mapping;
page->index = offset;
spin_lock_irq(&mapping->tree_lock);
error = radix_tree_insert(&mapping->page_tree, offset, page);
if (likely(!error)) {
mapping->nrpages++;
__inc_zone_page_state(page, NR_FILE_PAGES);
spin_unlock_irq(&mapping->tree_lock);
trace_mm_filemap_add_to_page_cache(page);
} else {
page->mapping = NULL;
/* Leave page->index set: truncation relies upon it */
spin_unlock_irq(&mapping->tree_lock);
mem_cgroup_uncharge_cache_page(page);
page_cache_release(page);
}
radix_tree_preload_end();
} else
mem_cgroup_uncharge_cache_page(page); mem_cgroup_uncharge_cache_page(page);
out: return error;
}
page_cache_get(page);
page->mapping = mapping;
page->index = offset;
spin_lock_irq(&mapping->tree_lock);
error = radix_tree_insert(&mapping->page_tree, offset, page);
radix_tree_preload_end();
if (unlikely(error))
goto err_insert;
mapping->nrpages++;
__inc_zone_page_state(page, NR_FILE_PAGES);
spin_unlock_irq(&mapping->tree_lock);
trace_mm_filemap_add_to_page_cache(page);
return 0;
err_insert:
page->mapping = NULL;
/* Leave page->index set: truncation relies upon it */
spin_unlock_irq(&mapping->tree_lock);
mem_cgroup_uncharge_cache_page(page);
page_cache_release(page);
return error; return error;
} }
EXPORT_SYMBOL(add_to_page_cache_locked); EXPORT_SYMBOL(add_to_page_cache_locked);
@ -1614,6 +1616,7 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
pgoff_t offset = vmf->pgoff; pgoff_t offset = vmf->pgoff;
struct page *page; struct page *page;
bool memcg_oom;
pgoff_t size; pgoff_t size;
int ret = 0; int ret = 0;
@ -1622,7 +1625,11 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
return VM_FAULT_SIGBUS; return VM_FAULT_SIGBUS;
/* /*
* Do we have something in the page cache already? * Do we have something in the page cache already? Either
* way, try readahead, but disable the memcg OOM killer for it
* as readahead is optional and no errors are propagated up
* the fault stack. The OOM killer is enabled while trying to
* instantiate the faulting page individually below.
*/ */
page = find_get_page(mapping, offset); page = find_get_page(mapping, offset);
if (likely(page) && !(vmf->flags & FAULT_FLAG_TRIED)) { if (likely(page) && !(vmf->flags & FAULT_FLAG_TRIED)) {
@ -1630,10 +1637,14 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
* We found the page, so try async readahead before * We found the page, so try async readahead before
* waiting for the lock. * waiting for the lock.
*/ */
memcg_oom = mem_cgroup_toggle_oom(false);
do_async_mmap_readahead(vma, ra, file, page, offset); do_async_mmap_readahead(vma, ra, file, page, offset);
mem_cgroup_toggle_oom(memcg_oom);
} else if (!page) { } else if (!page) {
/* No page in the page cache at all */ /* No page in the page cache at all */
memcg_oom = mem_cgroup_toggle_oom(false);
do_sync_mmap_readahead(vma, ra, file, offset); do_sync_mmap_readahead(vma, ra, file, offset);
mem_cgroup_toggle_oom(memcg_oom);
count_vm_event(PGMAJFAULT); count_vm_event(PGMAJFAULT);
mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT); mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
ret = VM_FAULT_MAJOR; ret = VM_FAULT_MAJOR;

View File

@ -695,11 +695,10 @@ pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
return pmd; return pmd;
} }
static inline pmd_t mk_huge_pmd(struct page *page, struct vm_area_struct *vma) static inline pmd_t mk_huge_pmd(struct page *page, pgprot_t prot)
{ {
pmd_t entry; pmd_t entry;
entry = mk_pmd(page, vma->vm_page_prot); entry = mk_pmd(page, prot);
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
entry = pmd_mkhuge(entry); entry = pmd_mkhuge(entry);
return entry; return entry;
} }
@ -732,7 +731,8 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
pte_free(mm, pgtable); pte_free(mm, pgtable);
} else { } else {
pmd_t entry; pmd_t entry;
entry = mk_huge_pmd(page, vma); entry = mk_huge_pmd(page, vma->vm_page_prot);
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
page_add_new_anon_rmap(page, vma, haddr); page_add_new_anon_rmap(page, vma, haddr);
pgtable_trans_huge_deposit(mm, pmd, pgtable); pgtable_trans_huge_deposit(mm, pmd, pgtable);
set_pmd_at(mm, haddr, pmd, entry); set_pmd_at(mm, haddr, pmd, entry);
@ -788,77 +788,57 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
{ {
struct page *page; struct page *page;
unsigned long haddr = address & HPAGE_PMD_MASK; unsigned long haddr = address & HPAGE_PMD_MASK;
pte_t *pte;
if (haddr >= vma->vm_start && haddr + HPAGE_PMD_SIZE <= vma->vm_end) { if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end)
if (unlikely(anon_vma_prepare(vma))) return VM_FAULT_FALLBACK;
if (unlikely(anon_vma_prepare(vma)))
return VM_FAULT_OOM;
if (unlikely(khugepaged_enter(vma)))
return VM_FAULT_OOM;
if (!(flags & FAULT_FLAG_WRITE) &&
transparent_hugepage_use_zero_page()) {
pgtable_t pgtable;
struct page *zero_page;
bool set;
pgtable = pte_alloc_one(mm, haddr);
if (unlikely(!pgtable))
return VM_FAULT_OOM; return VM_FAULT_OOM;
if (unlikely(khugepaged_enter(vma))) zero_page = get_huge_zero_page();
return VM_FAULT_OOM; if (unlikely(!zero_page)) {
if (!(flags & FAULT_FLAG_WRITE) && pte_free(mm, pgtable);
transparent_hugepage_use_zero_page()) {
pgtable_t pgtable;
struct page *zero_page;
bool set;
pgtable = pte_alloc_one(mm, haddr);
if (unlikely(!pgtable))
return VM_FAULT_OOM;
zero_page = get_huge_zero_page();
if (unlikely(!zero_page)) {
pte_free(mm, pgtable);
count_vm_event(THP_FAULT_FALLBACK);
goto out;
}
spin_lock(&mm->page_table_lock);
set = set_huge_zero_page(pgtable, mm, vma, haddr, pmd,
zero_page);
spin_unlock(&mm->page_table_lock);
if (!set) {
pte_free(mm, pgtable);
put_huge_zero_page();
}
return 0;
}
page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
vma, haddr, numa_node_id(), 0);
if (unlikely(!page)) {
count_vm_event(THP_FAULT_FALLBACK); count_vm_event(THP_FAULT_FALLBACK);
goto out; return VM_FAULT_FALLBACK;
} }
count_vm_event(THP_FAULT_ALLOC); spin_lock(&mm->page_table_lock);
if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) { set = set_huge_zero_page(pgtable, mm, vma, haddr, pmd,
put_page(page); zero_page);
goto out; spin_unlock(&mm->page_table_lock);
if (!set) {
pte_free(mm, pgtable);
put_huge_zero_page();
} }
if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd,
page))) {
mem_cgroup_uncharge_page(page);
put_page(page);
goto out;
}
return 0; return 0;
} }
out: page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
/* vma, haddr, numa_node_id(), 0);
* Use __pte_alloc instead of pte_alloc_map, because we can't if (unlikely(!page)) {
* run pte_offset_map on the pmd, if an huge pmd could count_vm_event(THP_FAULT_FALLBACK);
* materialize from under us from a different thread. return VM_FAULT_FALLBACK;
*/ }
if (unlikely(pmd_none(*pmd)) && if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) {
unlikely(__pte_alloc(mm, vma, pmd, address))) put_page(page);
return VM_FAULT_OOM; count_vm_event(THP_FAULT_FALLBACK);
/* if an huge pmd materialized from under us just retry later */ return VM_FAULT_FALLBACK;
if (unlikely(pmd_trans_huge(*pmd))) }
return 0; if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page))) {
/* mem_cgroup_uncharge_page(page);
* A regular pmd is established and it can't morph into a huge pmd put_page(page);
* from under us anymore at this point because we hold the mmap_sem count_vm_event(THP_FAULT_FALLBACK);
* read mode and khugepaged takes it in write mode. So now it's return VM_FAULT_FALLBACK;
* safe to run pte_offset_map(). }
*/
pte = pte_offset_map(pmd, address); count_vm_event(THP_FAULT_ALLOC);
return handle_pte_fault(mm, vma, address, pte, pmd, flags); return 0;
} }
int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
@ -1170,7 +1150,6 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
new_page = NULL; new_page = NULL;
if (unlikely(!new_page)) { if (unlikely(!new_page)) {
count_vm_event(THP_FAULT_FALLBACK);
if (is_huge_zero_pmd(orig_pmd)) { if (is_huge_zero_pmd(orig_pmd)) {
ret = do_huge_pmd_wp_zero_page_fallback(mm, vma, ret = do_huge_pmd_wp_zero_page_fallback(mm, vma,
address, pmd, orig_pmd, haddr); address, pmd, orig_pmd, haddr);
@ -1181,9 +1160,9 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
split_huge_page(page); split_huge_page(page);
put_page(page); put_page(page);
} }
count_vm_event(THP_FAULT_FALLBACK);
goto out; goto out;
} }
count_vm_event(THP_FAULT_ALLOC);
if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) { if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) {
put_page(new_page); put_page(new_page);
@ -1191,10 +1170,13 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
split_huge_page(page); split_huge_page(page);
put_page(page); put_page(page);
} }
count_vm_event(THP_FAULT_FALLBACK);
ret |= VM_FAULT_OOM; ret |= VM_FAULT_OOM;
goto out; goto out;
} }
count_vm_event(THP_FAULT_ALLOC);
if (is_huge_zero_pmd(orig_pmd)) if (is_huge_zero_pmd(orig_pmd))
clear_huge_page(new_page, haddr, HPAGE_PMD_NR); clear_huge_page(new_page, haddr, HPAGE_PMD_NR);
else else
@ -1215,7 +1197,8 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
goto out_mn; goto out_mn;
} else { } else {
pmd_t entry; pmd_t entry;
entry = mk_huge_pmd(new_page, vma); entry = mk_huge_pmd(new_page, vma->vm_page_prot);
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
pmdp_clear_flush(vma, haddr, pmd); pmdp_clear_flush(vma, haddr, pmd);
page_add_new_anon_rmap(new_page, vma, haddr); page_add_new_anon_rmap(new_page, vma, haddr);
set_pmd_at(mm, haddr, pmd, entry); set_pmd_at(mm, haddr, pmd, entry);
@ -1666,7 +1649,6 @@ static void __split_huge_page_refcount(struct page *page,
BUG_ON(atomic_read(&page->_count) <= 0); BUG_ON(atomic_read(&page->_count) <= 0);
__mod_zone_page_state(zone, NR_ANON_TRANSPARENT_HUGEPAGES, -1); __mod_zone_page_state(zone, NR_ANON_TRANSPARENT_HUGEPAGES, -1);
__mod_zone_page_state(zone, NR_ANON_PAGES, HPAGE_PMD_NR);
ClearPageCompound(page); ClearPageCompound(page);
compound_unlock(page); compound_unlock(page);
@ -2364,7 +2346,8 @@ static void collapse_huge_page(struct mm_struct *mm,
__SetPageUptodate(new_page); __SetPageUptodate(new_page);
pgtable = pmd_pgtable(_pmd); pgtable = pmd_pgtable(_pmd);
_pmd = mk_huge_pmd(new_page, vma); _pmd = mk_huge_pmd(new_page, vma->vm_page_prot);
_pmd = maybe_pmd_mkwrite(pmd_mkdirty(_pmd), vma);
/* /*
* spin_lock() below is not the equivalent of smp_wmb(), so * spin_lock() below is not the equivalent of smp_wmb(), so

File diff suppressed because it is too large Load Diff

View File

@ -3695,7 +3695,7 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
* but allow concurrent faults), and pte mapped but not yet locked. * but allow concurrent faults), and pte mapped but not yet locked.
* We return with mmap_sem still held, but pte unmapped and unlocked. * We return with mmap_sem still held, but pte unmapped and unlocked.
*/ */
int handle_pte_fault(struct mm_struct *mm, static int handle_pte_fault(struct mm_struct *mm,
struct vm_area_struct *vma, unsigned long address, struct vm_area_struct *vma, unsigned long address,
pte_t *pte, pmd_t *pmd, unsigned int flags) pte_t *pte, pmd_t *pmd, unsigned int flags)
{ {
@ -3754,22 +3754,14 @@ int handle_pte_fault(struct mm_struct *mm,
/* /*
* By the time we get here, we already hold the mm semaphore * By the time we get here, we already hold the mm semaphore
*/ */
int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, unsigned int flags) unsigned long address, unsigned int flags)
{ {
pgd_t *pgd; pgd_t *pgd;
pud_t *pud; pud_t *pud;
pmd_t *pmd; pmd_t *pmd;
pte_t *pte; pte_t *pte;
__set_current_state(TASK_RUNNING);
count_vm_event(PGFAULT);
mem_cgroup_count_vm_event(mm, PGFAULT);
/* do counter updates before entering really critical section. */
check_sync_rss_stat(current);
if (unlikely(is_vm_hugetlb_page(vma))) if (unlikely(is_vm_hugetlb_page(vma)))
return hugetlb_fault(mm, vma, address, flags); return hugetlb_fault(mm, vma, address, flags);
@ -3782,9 +3774,12 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
if (!pmd) if (!pmd)
return VM_FAULT_OOM; return VM_FAULT_OOM;
if (pmd_none(*pmd) && transparent_hugepage_enabled(vma)) { if (pmd_none(*pmd) && transparent_hugepage_enabled(vma)) {
int ret = VM_FAULT_FALLBACK;
if (!vma->vm_ops) if (!vma->vm_ops)
return do_huge_pmd_anonymous_page(mm, vma, address, ret = do_huge_pmd_anonymous_page(mm, vma, address,
pmd, flags); pmd, flags);
if (!(ret & VM_FAULT_FALLBACK))
return ret;
} else { } else {
pmd_t orig_pmd = *pmd; pmd_t orig_pmd = *pmd;
int ret; int ret;
@ -3850,6 +3845,37 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
return handle_pte_fault(mm, vma, address, pte, pmd, flags); return handle_pte_fault(mm, vma, address, pte, pmd, flags);
} }
int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, unsigned int flags)
{
int ret;
__set_current_state(TASK_RUNNING);
count_vm_event(PGFAULT);
mem_cgroup_count_vm_event(mm, PGFAULT);
/* do counter updates before entering really critical section. */
check_sync_rss_stat(current);
/*
* Enable the memcg OOM handling for faults triggered in user
* space. Kernel faults are handled more gracefully.
*/
if (flags & FAULT_FLAG_USER)
mem_cgroup_enable_oom();
ret = __handle_mm_fault(mm, vma, address, flags);
if (flags & FAULT_FLAG_USER)
mem_cgroup_disable_oom();
if (WARN_ON(task_in_memcg_oom(current) && !(ret & VM_FAULT_OOM)))
mem_cgroup_oom_synchronize();
return ret;
}
#ifndef __PAGETABLE_PUD_FOLDED #ifndef __PAGETABLE_PUD_FOLDED
/* /*
* Allocate page upper directory. * Allocate page upper directory.

View File

@ -678,9 +678,12 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
*/ */
void pagefault_out_of_memory(void) void pagefault_out_of_memory(void)
{ {
struct zonelist *zonelist = node_zonelist(first_online_node, struct zonelist *zonelist;
GFP_KERNEL);
if (mem_cgroup_oom_synchronize())
return;
zonelist = node_zonelist(first_online_node, GFP_KERNEL);
if (try_set_zonelist_oom(zonelist, GFP_KERNEL)) { if (try_set_zonelist_oom(zonelist, GFP_KERNEL)) {
out_of_memory(NULL, 0, 0, NULL, false); out_of_memory(NULL, 0, 0, NULL, false);
clear_zonelist_oom(zonelist, GFP_KERNEL); clear_zonelist_oom(zonelist, GFP_KERNEL);

View File

@ -2143,11 +2143,17 @@ EXPORT_SYMBOL(account_page_dirtied);
/* /*
* Helper function for set_page_writeback family. * Helper function for set_page_writeback family.
*
* The caller must hold mem_cgroup_begin/end_update_page_stat() lock
* while calling this function.
* See test_set_page_writeback for example.
*
* NOTE: Unlike account_page_dirtied this does not rely on being atomic * NOTE: Unlike account_page_dirtied this does not rely on being atomic
* wrt interrupts. * wrt interrupts.
*/ */
void account_page_writeback(struct page *page) void account_page_writeback(struct page *page)
{ {
mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_WRITEBACK);
inc_zone_page_state(page, NR_WRITEBACK); inc_zone_page_state(page, NR_WRITEBACK);
} }
EXPORT_SYMBOL(account_page_writeback); EXPORT_SYMBOL(account_page_writeback);
@ -2364,7 +2370,10 @@ int test_clear_page_writeback(struct page *page)
{ {
struct address_space *mapping = page_mapping(page); struct address_space *mapping = page_mapping(page);
int ret; int ret;
bool locked;
unsigned long memcg_flags;
mem_cgroup_begin_update_page_stat(page, &locked, &memcg_flags);
if (mapping) { if (mapping) {
struct backing_dev_info *bdi = mapping->backing_dev_info; struct backing_dev_info *bdi = mapping->backing_dev_info;
unsigned long flags; unsigned long flags;
@ -2385,9 +2394,11 @@ int test_clear_page_writeback(struct page *page)
ret = TestClearPageWriteback(page); ret = TestClearPageWriteback(page);
} }
if (ret) { if (ret) {
mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_WRITEBACK);
dec_zone_page_state(page, NR_WRITEBACK); dec_zone_page_state(page, NR_WRITEBACK);
inc_zone_page_state(page, NR_WRITTEN); inc_zone_page_state(page, NR_WRITTEN);
} }
mem_cgroup_end_update_page_stat(page, &locked, &memcg_flags);
return ret; return ret;
} }
@ -2395,7 +2406,10 @@ int test_set_page_writeback(struct page *page)
{ {
struct address_space *mapping = page_mapping(page); struct address_space *mapping = page_mapping(page);
int ret; int ret;
bool locked;
unsigned long memcg_flags;
mem_cgroup_begin_update_page_stat(page, &locked, &memcg_flags);
if (mapping) { if (mapping) {
struct backing_dev_info *bdi = mapping->backing_dev_info; struct backing_dev_info *bdi = mapping->backing_dev_info;
unsigned long flags; unsigned long flags;
@ -2422,6 +2436,7 @@ int test_set_page_writeback(struct page *page)
} }
if (!ret) if (!ret)
account_page_writeback(page); account_page_writeback(page);
mem_cgroup_end_update_page_stat(page, &locked, &memcg_flags);
return ret; return ret;
} }

View File

@ -1052,11 +1052,11 @@ void do_page_add_anon_rmap(struct page *page,
{ {
int first = atomic_inc_and_test(&page->_mapcount); int first = atomic_inc_and_test(&page->_mapcount);
if (first) { if (first) {
if (!PageTransHuge(page)) if (PageTransHuge(page))
__inc_zone_page_state(page, NR_ANON_PAGES);
else
__inc_zone_page_state(page, __inc_zone_page_state(page,
NR_ANON_TRANSPARENT_HUGEPAGES); NR_ANON_TRANSPARENT_HUGEPAGES);
__mod_zone_page_state(page_zone(page), NR_ANON_PAGES,
hpage_nr_pages(page));
} }
if (unlikely(PageKsm(page))) if (unlikely(PageKsm(page)))
return; return;
@ -1085,10 +1085,10 @@ void page_add_new_anon_rmap(struct page *page,
VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end); VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
SetPageSwapBacked(page); SetPageSwapBacked(page);
atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */ atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */
if (!PageTransHuge(page)) if (PageTransHuge(page))
__inc_zone_page_state(page, NR_ANON_PAGES);
else
__inc_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES); __inc_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
__mod_zone_page_state(page_zone(page), NR_ANON_PAGES,
hpage_nr_pages(page));
__page_set_anon_rmap(page, vma, address, 1); __page_set_anon_rmap(page, vma, address, 1);
if (!mlocked_vma_newpage(vma, page)) { if (!mlocked_vma_newpage(vma, page)) {
SetPageActive(page); SetPageActive(page);
@ -1111,7 +1111,7 @@ void page_add_file_rmap(struct page *page)
mem_cgroup_begin_update_page_stat(page, &locked, &flags); mem_cgroup_begin_update_page_stat(page, &locked, &flags);
if (atomic_inc_and_test(&page->_mapcount)) { if (atomic_inc_and_test(&page->_mapcount)) {
__inc_zone_page_state(page, NR_FILE_MAPPED); __inc_zone_page_state(page, NR_FILE_MAPPED);
mem_cgroup_inc_page_stat(page, MEMCG_NR_FILE_MAPPED); mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
} }
mem_cgroup_end_update_page_stat(page, &locked, &flags); mem_cgroup_end_update_page_stat(page, &locked, &flags);
} }
@ -1148,14 +1148,14 @@ void page_remove_rmap(struct page *page)
goto out; goto out;
if (anon) { if (anon) {
mem_cgroup_uncharge_page(page); mem_cgroup_uncharge_page(page);
if (!PageTransHuge(page)) if (PageTransHuge(page))
__dec_zone_page_state(page, NR_ANON_PAGES);
else
__dec_zone_page_state(page, __dec_zone_page_state(page,
NR_ANON_TRANSPARENT_HUGEPAGES); NR_ANON_TRANSPARENT_HUGEPAGES);
__mod_zone_page_state(page_zone(page), NR_ANON_PAGES,
-hpage_nr_pages(page));
} else { } else {
__dec_zone_page_state(page, NR_FILE_MAPPED); __dec_zone_page_state(page, NR_FILE_MAPPED);
mem_cgroup_dec_page_stat(page, MEMCG_NR_FILE_MAPPED); mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
mem_cgroup_end_update_page_stat(page, &locked, &flags); mem_cgroup_end_update_page_stat(page, &locked, &flags);
} }
if (unlikely(PageMlocked(page))) if (unlikely(PageMlocked(page)))

View File

@ -432,6 +432,11 @@ static void activate_page_drain(int cpu)
pagevec_lru_move_fn(pvec, __activate_page, NULL); pagevec_lru_move_fn(pvec, __activate_page, NULL);
} }
static bool need_activate_page_drain(int cpu)
{
return pagevec_count(&per_cpu(activate_page_pvecs, cpu)) != 0;
}
void activate_page(struct page *page) void activate_page(struct page *page)
{ {
if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
@ -449,6 +454,11 @@ static inline void activate_page_drain(int cpu)
{ {
} }
static bool need_activate_page_drain(int cpu)
{
return false;
}
void activate_page(struct page *page) void activate_page(struct page *page)
{ {
struct zone *zone = page_zone(page); struct zone *zone = page_zone(page);
@ -701,12 +711,36 @@ static void lru_add_drain_per_cpu(struct work_struct *dummy)
lru_add_drain(); lru_add_drain();
} }
/* static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);
* Returns 0 for success
*/ void lru_add_drain_all(void)
int lru_add_drain_all(void)
{ {
return schedule_on_each_cpu(lru_add_drain_per_cpu); static DEFINE_MUTEX(lock);
static struct cpumask has_work;
int cpu;
mutex_lock(&lock);
get_online_cpus();
cpumask_clear(&has_work);
for_each_online_cpu(cpu) {
struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) ||
pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) ||
pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) ||
need_activate_page_drain(cpu)) {
INIT_WORK(work, lru_add_drain_per_cpu);
schedule_work_on(cpu, work);
cpumask_set_cpu(cpu, &has_work);
}
}
for_each_cpu(cpu, &has_work)
flush_work(&per_cpu(lru_add_drain_work, cpu));
put_online_cpus();
mutex_unlock(&lock);
} }
/* /*

View File

@ -567,7 +567,6 @@ EXPORT_SYMBOL_GPL(invalidate_inode_pages2);
/** /**
* truncate_pagecache - unmap and remove pagecache that has been truncated * truncate_pagecache - unmap and remove pagecache that has been truncated
* @inode: inode * @inode: inode
* @oldsize: old file size
* @newsize: new file size * @newsize: new file size
* *
* inode's new i_size must already be written before truncate_pagecache * inode's new i_size must already be written before truncate_pagecache
@ -580,7 +579,7 @@ EXPORT_SYMBOL_GPL(invalidate_inode_pages2);
* situations such as writepage being called for a page that has already * situations such as writepage being called for a page that has already
* had its underlying blocks deallocated. * had its underlying blocks deallocated.
*/ */
void truncate_pagecache(struct inode *inode, loff_t oldsize, loff_t newsize) void truncate_pagecache(struct inode *inode, loff_t newsize)
{ {
struct address_space *mapping = inode->i_mapping; struct address_space *mapping = inode->i_mapping;
loff_t holebegin = round_up(newsize, PAGE_SIZE); loff_t holebegin = round_up(newsize, PAGE_SIZE);
@ -614,12 +613,8 @@ EXPORT_SYMBOL(truncate_pagecache);
*/ */
void truncate_setsize(struct inode *inode, loff_t newsize) void truncate_setsize(struct inode *inode, loff_t newsize)
{ {
loff_t oldsize;
oldsize = inode->i_size;
i_size_write(inode, newsize); i_size_write(inode, newsize);
truncate_pagecache(inode, newsize);
truncate_pagecache(inode, oldsize, newsize);
} }
EXPORT_SYMBOL(truncate_setsize); EXPORT_SYMBOL(truncate_setsize);

View File

@ -139,11 +139,23 @@ static bool global_reclaim(struct scan_control *sc)
{ {
return !sc->target_mem_cgroup; return !sc->target_mem_cgroup;
} }
static bool mem_cgroup_should_soft_reclaim(struct scan_control *sc)
{
struct mem_cgroup *root = sc->target_mem_cgroup;
return !mem_cgroup_disabled() &&
mem_cgroup_soft_reclaim_eligible(root, root) != SKIP_TREE;
}
#else #else
static bool global_reclaim(struct scan_control *sc) static bool global_reclaim(struct scan_control *sc)
{ {
return true; return true;
} }
static bool mem_cgroup_should_soft_reclaim(struct scan_control *sc)
{
return false;
}
#endif #endif
unsigned long zone_reclaimable_pages(struct zone *zone) unsigned long zone_reclaimable_pages(struct zone *zone)
@ -2164,9 +2176,11 @@ static inline bool should_continue_reclaim(struct zone *zone,
} }
} }
static void shrink_zone(struct zone *zone, struct scan_control *sc) static int
__shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim)
{ {
unsigned long nr_reclaimed, nr_scanned; unsigned long nr_reclaimed, nr_scanned;
int groups_scanned = 0;
do { do {
struct mem_cgroup *root = sc->target_mem_cgroup; struct mem_cgroup *root = sc->target_mem_cgroup;
@ -2174,15 +2188,17 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc)
.zone = zone, .zone = zone,
.priority = sc->priority, .priority = sc->priority,
}; };
struct mem_cgroup *memcg; struct mem_cgroup *memcg = NULL;
mem_cgroup_iter_filter filter = (soft_reclaim) ?
mem_cgroup_soft_reclaim_eligible : NULL;
nr_reclaimed = sc->nr_reclaimed; nr_reclaimed = sc->nr_reclaimed;
nr_scanned = sc->nr_scanned; nr_scanned = sc->nr_scanned;
memcg = mem_cgroup_iter(root, NULL, &reclaim); while ((memcg = mem_cgroup_iter_cond(root, memcg, &reclaim, filter))) {
do {
struct lruvec *lruvec; struct lruvec *lruvec;
groups_scanned++;
lruvec = mem_cgroup_zone_lruvec(zone, memcg); lruvec = mem_cgroup_zone_lruvec(zone, memcg);
shrink_lruvec(lruvec, sc); shrink_lruvec(lruvec, sc);
@ -2202,8 +2218,7 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc)
mem_cgroup_iter_break(root, memcg); mem_cgroup_iter_break(root, memcg);
break; break;
} }
memcg = mem_cgroup_iter(root, memcg, &reclaim); }
} while (memcg);
vmpressure(sc->gfp_mask, sc->target_mem_cgroup, vmpressure(sc->gfp_mask, sc->target_mem_cgroup,
sc->nr_scanned - nr_scanned, sc->nr_scanned - nr_scanned,
@ -2211,6 +2226,37 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc)
} while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed, } while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed,
sc->nr_scanned - nr_scanned, sc)); sc->nr_scanned - nr_scanned, sc));
return groups_scanned;
}
static void shrink_zone(struct zone *zone, struct scan_control *sc)
{
bool do_soft_reclaim = mem_cgroup_should_soft_reclaim(sc);
unsigned long nr_scanned = sc->nr_scanned;
int scanned_groups;
scanned_groups = __shrink_zone(zone, sc, do_soft_reclaim);
/*
* memcg iterator might race with other reclaimer or start from
* a incomplete tree walk so the tree walk in __shrink_zone
* might have missed groups that are above the soft limit. Try
* another loop to catch up with others. Do it just once to
* prevent from reclaim latencies when other reclaimers always
* preempt this one.
*/
if (do_soft_reclaim && !scanned_groups)
__shrink_zone(zone, sc, do_soft_reclaim);
/*
* No group is over the soft limit or those that are do not have
* pages in the zone we are reclaiming so we have to reclaim everybody
*/
if (do_soft_reclaim && (sc->nr_scanned == nr_scanned)) {
__shrink_zone(zone, sc, false);
return;
}
} }
/* Returns true if compaction should go ahead for a high-order request */ /* Returns true if compaction should go ahead for a high-order request */
@ -2274,8 +2320,6 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
{ {
struct zoneref *z; struct zoneref *z;
struct zone *zone; struct zone *zone;
unsigned long nr_soft_reclaimed;
unsigned long nr_soft_scanned;
bool aborted_reclaim = false; bool aborted_reclaim = false;
/* /*
@ -2315,18 +2359,6 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
continue; continue;
} }
} }
/*
* This steals pages from memory cgroups over softlimit
* and returns the number of reclaimed pages and
* scanned pages. This works for global memory pressure
* and balancing, not for a memcg's limit.
*/
nr_soft_scanned = 0;
nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone,
sc->order, sc->gfp_mask,
&nr_soft_scanned);
sc->nr_reclaimed += nr_soft_reclaimed;
sc->nr_scanned += nr_soft_scanned;
/* need some check for avoid more shrink_zone() */ /* need some check for avoid more shrink_zone() */
} }
@ -2920,8 +2952,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
{ {
int i; int i;
int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */
unsigned long nr_soft_reclaimed;
unsigned long nr_soft_scanned;
struct scan_control sc = { struct scan_control sc = {
.gfp_mask = GFP_KERNEL, .gfp_mask = GFP_KERNEL,
.priority = DEF_PRIORITY, .priority = DEF_PRIORITY,
@ -3036,15 +3066,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
sc.nr_scanned = 0; sc.nr_scanned = 0;
nr_soft_scanned = 0;
/*
* Call soft limit reclaim before calling shrink_zone.
*/
nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone,
order, sc.gfp_mask,
&nr_soft_scanned);
sc.nr_reclaimed += nr_soft_reclaimed;
/* /*
* There should be no need to raise the scanning * There should be no need to raise the scanning
* priority if enough pages are already being scanned * priority if enough pages are already being scanned

View File

@ -87,8 +87,8 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
if (!cg_proto) if (!cg_proto)
return -EINVAL; return -EINVAL;
if (val > RESOURCE_MAX) if (val > RES_COUNTER_MAX)
val = RESOURCE_MAX; val = RES_COUNTER_MAX;
tcp = tcp_from_cgproto(cg_proto); tcp = tcp_from_cgproto(cg_proto);
@ -101,9 +101,9 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
tcp->tcp_prot_mem[i] = min_t(long, val >> PAGE_SHIFT, tcp->tcp_prot_mem[i] = min_t(long, val >> PAGE_SHIFT,
net->ipv4.sysctl_tcp_mem[i]); net->ipv4.sysctl_tcp_mem[i]);
if (val == RESOURCE_MAX) if (val == RES_COUNTER_MAX)
clear_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags); clear_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags);
else if (val != RESOURCE_MAX) { else if (val != RES_COUNTER_MAX) {
/* /*
* The active bit needs to be written after the static_key * The active bit needs to be written after the static_key
* update. This is what guarantees that the socket activation * update. This is what guarantees that the socket activation
@ -187,7 +187,7 @@ static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft)
switch (cft->private) { switch (cft->private) {
case RES_LIMIT: case RES_LIMIT:
val = tcp_read_stat(memcg, RES_LIMIT, RESOURCE_MAX); val = tcp_read_stat(memcg, RES_LIMIT, RES_COUNTER_MAX);
break; break;
case RES_USAGE: case RES_USAGE:
val = tcp_read_usage(memcg); val = tcp_read_usage(memcg);