Merge branch 'akpm' (fixes from Andrew Morton)
Merge misc fixes from Andrew Morton. * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (21 commits) mm: revert mremap pud_free anti-fix mm: fix BUG in __split_huge_page_pmd swap: fix set_blocksize race during swapon/swapoff procfs: call default get_unmapped_area on MMU-present architectures procfs: fix unintended truncation of returned mapped address writeback: fix negative bdi max pause percpu_refcount: export symbols fs: buffer: move allocation failure loop into the allocator mm: memcg: handle non-error OOM situations more gracefully tools/testing/selftests: fix uninitialized variable block/partitions/efi.c: treat size mismatch as a warning, not an error mm: hugetlb: initialize PG_reserved for tail pages of gigantic compound pages mm/zswap: bugfix: memory leak when re-swapon mm: /proc/pid/pagemap: inspect _PAGE_SOFT_DIRTY only on present pages mm: migration: do not lose soft dirty bit if page is in migration state gcov: MAINTAINERS: Add an entry for gcov mm/hugetlb.c: correct missing private flag clearing mm/vmscan.c: don't forget to free shrinker->nr_deferred ipc/sem.c: synchronize semop and semctl with IPC_RMID ipc: update locking scheme comments ...
This commit is contained in:
commit
056cdce0d3
@ -3624,6 +3624,12 @@ L: linux-scsi@vger.kernel.org
|
|||||||
S: Odd Fixes (e.g., new signatures)
|
S: Odd Fixes (e.g., new signatures)
|
||||||
F: drivers/scsi/fdomain.*
|
F: drivers/scsi/fdomain.*
|
||||||
|
|
||||||
|
GCOV BASED KERNEL PROFILING
|
||||||
|
M: Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
|
||||||
|
S: Maintained
|
||||||
|
F: kernel/gcov/
|
||||||
|
F: Documentation/gcov.txt
|
||||||
|
|
||||||
GDT SCSI DISK ARRAY CONTROLLER DRIVER
|
GDT SCSI DISK ARRAY CONTROLLER DRIVER
|
||||||
M: Achim Leubner <achim_leubner@adaptec.com>
|
M: Achim Leubner <achim_leubner@adaptec.com>
|
||||||
L: linux-scsi@vger.kernel.org
|
L: linux-scsi@vger.kernel.org
|
||||||
|
@ -222,11 +222,16 @@ static int is_pmbr_valid(legacy_mbr *mbr, sector_t total_sectors)
|
|||||||
* the disk size.
|
* the disk size.
|
||||||
*
|
*
|
||||||
* Hybrid MBRs do not necessarily comply with this.
|
* Hybrid MBRs do not necessarily comply with this.
|
||||||
|
*
|
||||||
|
* Consider a bad value here to be a warning to support dd'ing
|
||||||
|
* an image from a smaller disk to a larger disk.
|
||||||
*/
|
*/
|
||||||
if (ret == GPT_MBR_PROTECTIVE) {
|
if (ret == GPT_MBR_PROTECTIVE) {
|
||||||
sz = le32_to_cpu(mbr->partition_record[part].size_in_lba);
|
sz = le32_to_cpu(mbr->partition_record[part].size_in_lba);
|
||||||
if (sz != (uint32_t) total_sectors - 1 && sz != 0xFFFFFFFF)
|
if (sz != (uint32_t) total_sectors - 1 && sz != 0xFFFFFFFF)
|
||||||
ret = 0;
|
pr_debug("GPT: mbr size in lba (%u) different than whole disk (%u).\n",
|
||||||
|
sz, min_t(uint32_t,
|
||||||
|
total_sectors - 1, 0xFFFFFFFF));
|
||||||
}
|
}
|
||||||
done:
|
done:
|
||||||
return ret;
|
return ret;
|
||||||
|
14
fs/buffer.c
14
fs/buffer.c
@ -1005,9 +1005,19 @@ grow_dev_page(struct block_device *bdev, sector_t block,
|
|||||||
struct buffer_head *bh;
|
struct buffer_head *bh;
|
||||||
sector_t end_block;
|
sector_t end_block;
|
||||||
int ret = 0; /* Will call free_more_memory() */
|
int ret = 0; /* Will call free_more_memory() */
|
||||||
|
gfp_t gfp_mask;
|
||||||
|
|
||||||
page = find_or_create_page(inode->i_mapping, index,
|
gfp_mask = mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS;
|
||||||
(mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE);
|
gfp_mask |= __GFP_MOVABLE;
|
||||||
|
/*
|
||||||
|
* XXX: __getblk_slow() can not really deal with failure and
|
||||||
|
* will endlessly loop on improvised global reclaim. Prefer
|
||||||
|
* looping in the allocator rather than here, at least that
|
||||||
|
* code knows what it's doing.
|
||||||
|
*/
|
||||||
|
gfp_mask |= __GFP_NOFAIL;
|
||||||
|
|
||||||
|
page = find_or_create_page(inode->i_mapping, index, gfp_mask);
|
||||||
if (!page)
|
if (!page)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
|
@ -288,10 +288,14 @@ static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma)
|
|||||||
static unsigned long proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr, unsigned long len, unsigned long pgoff, unsigned long flags)
|
static unsigned long proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr, unsigned long len, unsigned long pgoff, unsigned long flags)
|
||||||
{
|
{
|
||||||
struct proc_dir_entry *pde = PDE(file_inode(file));
|
struct proc_dir_entry *pde = PDE(file_inode(file));
|
||||||
int rv = -EIO;
|
unsigned long rv = -EIO;
|
||||||
unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
|
unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long) = NULL;
|
||||||
if (use_pde(pde)) {
|
if (use_pde(pde)) {
|
||||||
get_unmapped_area = pde->proc_fops->get_unmapped_area;
|
#ifdef CONFIG_MMU
|
||||||
|
get_unmapped_area = current->mm->get_unmapped_area;
|
||||||
|
#endif
|
||||||
|
if (pde->proc_fops->get_unmapped_area)
|
||||||
|
get_unmapped_area = pde->proc_fops->get_unmapped_area;
|
||||||
if (get_unmapped_area)
|
if (get_unmapped_area)
|
||||||
rv = get_unmapped_area(file, orig_addr, len, pgoff, flags);
|
rv = get_unmapped_area(file, orig_addr, len, pgoff, flags);
|
||||||
unuse_pde(pde);
|
unuse_pde(pde);
|
||||||
|
@ -941,6 +941,8 @@ static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
|
|||||||
frame = pte_pfn(pte);
|
frame = pte_pfn(pte);
|
||||||
flags = PM_PRESENT;
|
flags = PM_PRESENT;
|
||||||
page = vm_normal_page(vma, addr, pte);
|
page = vm_normal_page(vma, addr, pte);
|
||||||
|
if (pte_soft_dirty(pte))
|
||||||
|
flags2 |= __PM_SOFT_DIRTY;
|
||||||
} else if (is_swap_pte(pte)) {
|
} else if (is_swap_pte(pte)) {
|
||||||
swp_entry_t entry;
|
swp_entry_t entry;
|
||||||
if (pte_swp_soft_dirty(pte))
|
if (pte_swp_soft_dirty(pte))
|
||||||
@ -960,7 +962,7 @@ static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
|
|||||||
|
|
||||||
if (page && !PageAnon(page))
|
if (page && !PageAnon(page))
|
||||||
flags |= PM_FILE;
|
flags |= PM_FILE;
|
||||||
if ((vma->vm_flags & VM_SOFTDIRTY) || pte_soft_dirty(pte))
|
if ((vma->vm_flags & VM_SOFTDIRTY))
|
||||||
flags2 |= __PM_SOFT_DIRTY;
|
flags2 |= __PM_SOFT_DIRTY;
|
||||||
|
|
||||||
*pme = make_pme(PM_PFRAME(frame) | PM_STATUS2(pm->v2, flags2) | flags);
|
*pme = make_pme(PM_PFRAME(frame) | PM_STATUS2(pm->v2, flags2) | flags);
|
||||||
|
@ -137,47 +137,24 @@ extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
|
|||||||
extern void mem_cgroup_replace_page_cache(struct page *oldpage,
|
extern void mem_cgroup_replace_page_cache(struct page *oldpage,
|
||||||
struct page *newpage);
|
struct page *newpage);
|
||||||
|
|
||||||
/**
|
static inline void mem_cgroup_oom_enable(void)
|
||||||
* mem_cgroup_toggle_oom - toggle the memcg OOM killer for the current task
|
|
||||||
* @new: true to enable, false to disable
|
|
||||||
*
|
|
||||||
* Toggle whether a failed memcg charge should invoke the OOM killer
|
|
||||||
* or just return -ENOMEM. Returns the previous toggle state.
|
|
||||||
*
|
|
||||||
* NOTE: Any path that enables the OOM killer before charging must
|
|
||||||
* call mem_cgroup_oom_synchronize() afterward to finalize the
|
|
||||||
* OOM handling and clean up.
|
|
||||||
*/
|
|
||||||
static inline bool mem_cgroup_toggle_oom(bool new)
|
|
||||||
{
|
{
|
||||||
bool old;
|
WARN_ON(current->memcg_oom.may_oom);
|
||||||
|
current->memcg_oom.may_oom = 1;
|
||||||
old = current->memcg_oom.may_oom;
|
|
||||||
current->memcg_oom.may_oom = new;
|
|
||||||
|
|
||||||
return old;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void mem_cgroup_enable_oom(void)
|
static inline void mem_cgroup_oom_disable(void)
|
||||||
{
|
{
|
||||||
bool old = mem_cgroup_toggle_oom(true);
|
WARN_ON(!current->memcg_oom.may_oom);
|
||||||
|
current->memcg_oom.may_oom = 0;
|
||||||
WARN_ON(old == true);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void mem_cgroup_disable_oom(void)
|
|
||||||
{
|
|
||||||
bool old = mem_cgroup_toggle_oom(false);
|
|
||||||
|
|
||||||
WARN_ON(old == false);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool task_in_memcg_oom(struct task_struct *p)
|
static inline bool task_in_memcg_oom(struct task_struct *p)
|
||||||
{
|
{
|
||||||
return p->memcg_oom.in_memcg_oom;
|
return p->memcg_oom.memcg;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool mem_cgroup_oom_synchronize(void);
|
bool mem_cgroup_oom_synchronize(bool wait);
|
||||||
|
|
||||||
#ifdef CONFIG_MEMCG_SWAP
|
#ifdef CONFIG_MEMCG_SWAP
|
||||||
extern int do_swap_account;
|
extern int do_swap_account;
|
||||||
@ -402,16 +379,11 @@ static inline void mem_cgroup_end_update_page_stat(struct page *page,
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool mem_cgroup_toggle_oom(bool new)
|
static inline void mem_cgroup_oom_enable(void)
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void mem_cgroup_enable_oom(void)
|
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void mem_cgroup_disable_oom(void)
|
static inline void mem_cgroup_oom_disable(void)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -420,7 +392,7 @@ static inline bool task_in_memcg_oom(struct task_struct *p)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool mem_cgroup_oom_synchronize(void)
|
static inline bool mem_cgroup_oom_synchronize(bool wait)
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -1394,11 +1394,10 @@ struct task_struct {
|
|||||||
} memcg_batch;
|
} memcg_batch;
|
||||||
unsigned int memcg_kmem_skip_account;
|
unsigned int memcg_kmem_skip_account;
|
||||||
struct memcg_oom_info {
|
struct memcg_oom_info {
|
||||||
|
struct mem_cgroup *memcg;
|
||||||
|
gfp_t gfp_mask;
|
||||||
|
int order;
|
||||||
unsigned int may_oom:1;
|
unsigned int may_oom:1;
|
||||||
unsigned int in_memcg_oom:1;
|
|
||||||
unsigned int oom_locked:1;
|
|
||||||
int wakeups;
|
|
||||||
struct mem_cgroup *wait_on_memcg;
|
|
||||||
} memcg_oom;
|
} memcg_oom;
|
||||||
#endif
|
#endif
|
||||||
#ifdef CONFIG_UPROBES
|
#ifdef CONFIG_UPROBES
|
||||||
|
42
ipc/sem.c
42
ipc/sem.c
@ -1282,6 +1282,12 @@ static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum,
|
|||||||
|
|
||||||
sem_lock(sma, NULL, -1);
|
sem_lock(sma, NULL, -1);
|
||||||
|
|
||||||
|
if (sma->sem_perm.deleted) {
|
||||||
|
sem_unlock(sma, -1);
|
||||||
|
rcu_read_unlock();
|
||||||
|
return -EIDRM;
|
||||||
|
}
|
||||||
|
|
||||||
curr = &sma->sem_base[semnum];
|
curr = &sma->sem_base[semnum];
|
||||||
|
|
||||||
ipc_assert_locked_object(&sma->sem_perm);
|
ipc_assert_locked_object(&sma->sem_perm);
|
||||||
@ -1336,12 +1342,14 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
|
|||||||
int i;
|
int i;
|
||||||
|
|
||||||
sem_lock(sma, NULL, -1);
|
sem_lock(sma, NULL, -1);
|
||||||
|
if (sma->sem_perm.deleted) {
|
||||||
|
err = -EIDRM;
|
||||||
|
goto out_unlock;
|
||||||
|
}
|
||||||
if(nsems > SEMMSL_FAST) {
|
if(nsems > SEMMSL_FAST) {
|
||||||
if (!ipc_rcu_getref(sma)) {
|
if (!ipc_rcu_getref(sma)) {
|
||||||
sem_unlock(sma, -1);
|
|
||||||
rcu_read_unlock();
|
|
||||||
err = -EIDRM;
|
err = -EIDRM;
|
||||||
goto out_free;
|
goto out_unlock;
|
||||||
}
|
}
|
||||||
sem_unlock(sma, -1);
|
sem_unlock(sma, -1);
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
@ -1354,10 +1362,8 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
|
|||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
sem_lock_and_putref(sma);
|
sem_lock_and_putref(sma);
|
||||||
if (sma->sem_perm.deleted) {
|
if (sma->sem_perm.deleted) {
|
||||||
sem_unlock(sma, -1);
|
|
||||||
rcu_read_unlock();
|
|
||||||
err = -EIDRM;
|
err = -EIDRM;
|
||||||
goto out_free;
|
goto out_unlock;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (i = 0; i < sma->sem_nsems; i++)
|
for (i = 0; i < sma->sem_nsems; i++)
|
||||||
@ -1375,8 +1381,8 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
|
|||||||
struct sem_undo *un;
|
struct sem_undo *un;
|
||||||
|
|
||||||
if (!ipc_rcu_getref(sma)) {
|
if (!ipc_rcu_getref(sma)) {
|
||||||
rcu_read_unlock();
|
err = -EIDRM;
|
||||||
return -EIDRM;
|
goto out_rcu_wakeup;
|
||||||
}
|
}
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
|
|
||||||
@ -1404,10 +1410,8 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
|
|||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
sem_lock_and_putref(sma);
|
sem_lock_and_putref(sma);
|
||||||
if (sma->sem_perm.deleted) {
|
if (sma->sem_perm.deleted) {
|
||||||
sem_unlock(sma, -1);
|
|
||||||
rcu_read_unlock();
|
|
||||||
err = -EIDRM;
|
err = -EIDRM;
|
||||||
goto out_free;
|
goto out_unlock;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < nsems; i++)
|
for (i = 0; i < nsems; i++)
|
||||||
@ -1431,6 +1435,10 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
|
|||||||
goto out_rcu_wakeup;
|
goto out_rcu_wakeup;
|
||||||
|
|
||||||
sem_lock(sma, NULL, -1);
|
sem_lock(sma, NULL, -1);
|
||||||
|
if (sma->sem_perm.deleted) {
|
||||||
|
err = -EIDRM;
|
||||||
|
goto out_unlock;
|
||||||
|
}
|
||||||
curr = &sma->sem_base[semnum];
|
curr = &sma->sem_base[semnum];
|
||||||
|
|
||||||
switch (cmd) {
|
switch (cmd) {
|
||||||
@ -1836,6 +1844,10 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
|
|||||||
if (error)
|
if (error)
|
||||||
goto out_rcu_wakeup;
|
goto out_rcu_wakeup;
|
||||||
|
|
||||||
|
error = -EIDRM;
|
||||||
|
locknum = sem_lock(sma, sops, nsops);
|
||||||
|
if (sma->sem_perm.deleted)
|
||||||
|
goto out_unlock_free;
|
||||||
/*
|
/*
|
||||||
* semid identifiers are not unique - find_alloc_undo may have
|
* semid identifiers are not unique - find_alloc_undo may have
|
||||||
* allocated an undo structure, it was invalidated by an RMID
|
* allocated an undo structure, it was invalidated by an RMID
|
||||||
@ -1843,8 +1855,6 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
|
|||||||
* This case can be detected checking un->semid. The existence of
|
* This case can be detected checking un->semid. The existence of
|
||||||
* "un" itself is guaranteed by rcu.
|
* "un" itself is guaranteed by rcu.
|
||||||
*/
|
*/
|
||||||
error = -EIDRM;
|
|
||||||
locknum = sem_lock(sma, sops, nsops);
|
|
||||||
if (un && un->semid == -1)
|
if (un && un->semid == -1)
|
||||||
goto out_unlock_free;
|
goto out_unlock_free;
|
||||||
|
|
||||||
@ -2057,6 +2067,12 @@ void exit_sem(struct task_struct *tsk)
|
|||||||
}
|
}
|
||||||
|
|
||||||
sem_lock(sma, NULL, -1);
|
sem_lock(sma, NULL, -1);
|
||||||
|
/* exit_sem raced with IPC_RMID, nothing to do */
|
||||||
|
if (sma->sem_perm.deleted) {
|
||||||
|
sem_unlock(sma, -1);
|
||||||
|
rcu_read_unlock();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
un = __lookup_undo(ulp, semid);
|
un = __lookup_undo(ulp, semid);
|
||||||
if (un == NULL) {
|
if (un == NULL) {
|
||||||
/* exit_sem raced with IPC_RMID+semget() that created
|
/* exit_sem raced with IPC_RMID+semget() that created
|
||||||
|
27
ipc/util.c
27
ipc/util.c
@ -17,12 +17,27 @@
|
|||||||
* Pavel Emelianov <xemul@openvz.org>
|
* Pavel Emelianov <xemul@openvz.org>
|
||||||
*
|
*
|
||||||
* General sysv ipc locking scheme:
|
* General sysv ipc locking scheme:
|
||||||
* when doing ipc id lookups, take the ids->rwsem
|
* rcu_read_lock()
|
||||||
* rcu_read_lock()
|
* obtain the ipc object (kern_ipc_perm) by looking up the id in an idr
|
||||||
* obtain the ipc object (kern_ipc_perm)
|
* tree.
|
||||||
* perform security, capabilities, auditing and permission checks, etc.
|
* - perform initial checks (capabilities, auditing and permission,
|
||||||
* acquire the ipc lock (kern_ipc_perm.lock) throught ipc_lock_object()
|
* etc).
|
||||||
* perform data updates (ie: SET, RMID, LOCK/UNLOCK commands)
|
* - perform read-only operations, such as STAT, INFO commands.
|
||||||
|
* acquire the ipc lock (kern_ipc_perm.lock) through
|
||||||
|
* ipc_lock_object()
|
||||||
|
* - perform data updates, such as SET, RMID commands and
|
||||||
|
* mechanism-specific operations (semop/semtimedop,
|
||||||
|
* msgsnd/msgrcv, shmat/shmdt).
|
||||||
|
* drop the ipc lock, through ipc_unlock_object().
|
||||||
|
* rcu_read_unlock()
|
||||||
|
*
|
||||||
|
* The ids->rwsem must be taken when:
|
||||||
|
* - creating, removing and iterating the existing entries in ipc
|
||||||
|
* identifier sets.
|
||||||
|
* - iterating through files under /proc/sysvipc/
|
||||||
|
*
|
||||||
|
* Note that sems have a special fast path that avoids kern_ipc_perm.lock -
|
||||||
|
* see sem_lock().
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <linux/mm.h>
|
#include <linux/mm.h>
|
||||||
|
@ -53,6 +53,7 @@ int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release)
|
|||||||
ref->release = release;
|
ref->release = release;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(percpu_ref_init);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* percpu_ref_cancel_init - cancel percpu_ref_init()
|
* percpu_ref_cancel_init - cancel percpu_ref_init()
|
||||||
@ -84,6 +85,7 @@ void percpu_ref_cancel_init(struct percpu_ref *ref)
|
|||||||
free_percpu(ref->pcpu_count);
|
free_percpu(ref->pcpu_count);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(percpu_ref_cancel_init);
|
||||||
|
|
||||||
static void percpu_ref_kill_rcu(struct rcu_head *rcu)
|
static void percpu_ref_kill_rcu(struct rcu_head *rcu)
|
||||||
{
|
{
|
||||||
@ -156,3 +158,4 @@ void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
|
|||||||
|
|
||||||
call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu);
|
call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu);
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
|
||||||
|
11
mm/filemap.c
11
mm/filemap.c
@ -1616,7 +1616,6 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
|||||||
struct inode *inode = mapping->host;
|
struct inode *inode = mapping->host;
|
||||||
pgoff_t offset = vmf->pgoff;
|
pgoff_t offset = vmf->pgoff;
|
||||||
struct page *page;
|
struct page *page;
|
||||||
bool memcg_oom;
|
|
||||||
pgoff_t size;
|
pgoff_t size;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
@ -1625,11 +1624,7 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
|||||||
return VM_FAULT_SIGBUS;
|
return VM_FAULT_SIGBUS;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Do we have something in the page cache already? Either
|
* Do we have something in the page cache already?
|
||||||
* way, try readahead, but disable the memcg OOM killer for it
|
|
||||||
* as readahead is optional and no errors are propagated up
|
|
||||||
* the fault stack. The OOM killer is enabled while trying to
|
|
||||||
* instantiate the faulting page individually below.
|
|
||||||
*/
|
*/
|
||||||
page = find_get_page(mapping, offset);
|
page = find_get_page(mapping, offset);
|
||||||
if (likely(page) && !(vmf->flags & FAULT_FLAG_TRIED)) {
|
if (likely(page) && !(vmf->flags & FAULT_FLAG_TRIED)) {
|
||||||
@ -1637,14 +1632,10 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
|||||||
* We found the page, so try async readahead before
|
* We found the page, so try async readahead before
|
||||||
* waiting for the lock.
|
* waiting for the lock.
|
||||||
*/
|
*/
|
||||||
memcg_oom = mem_cgroup_toggle_oom(false);
|
|
||||||
do_async_mmap_readahead(vma, ra, file, page, offset);
|
do_async_mmap_readahead(vma, ra, file, page, offset);
|
||||||
mem_cgroup_toggle_oom(memcg_oom);
|
|
||||||
} else if (!page) {
|
} else if (!page) {
|
||||||
/* No page in the page cache at all */
|
/* No page in the page cache at all */
|
||||||
memcg_oom = mem_cgroup_toggle_oom(false);
|
|
||||||
do_sync_mmap_readahead(vma, ra, file, offset);
|
do_sync_mmap_readahead(vma, ra, file, offset);
|
||||||
mem_cgroup_toggle_oom(memcg_oom);
|
|
||||||
count_vm_event(PGMAJFAULT);
|
count_vm_event(PGMAJFAULT);
|
||||||
mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
|
mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
|
||||||
ret = VM_FAULT_MAJOR;
|
ret = VM_FAULT_MAJOR;
|
||||||
|
@ -2697,6 +2697,7 @@ void __split_huge_page_pmd(struct vm_area_struct *vma, unsigned long address,
|
|||||||
|
|
||||||
mmun_start = haddr;
|
mmun_start = haddr;
|
||||||
mmun_end = haddr + HPAGE_PMD_SIZE;
|
mmun_end = haddr + HPAGE_PMD_SIZE;
|
||||||
|
again:
|
||||||
mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
|
mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
|
||||||
spin_lock(&mm->page_table_lock);
|
spin_lock(&mm->page_table_lock);
|
||||||
if (unlikely(!pmd_trans_huge(*pmd))) {
|
if (unlikely(!pmd_trans_huge(*pmd))) {
|
||||||
@ -2719,7 +2720,14 @@ void __split_huge_page_pmd(struct vm_area_struct *vma, unsigned long address,
|
|||||||
split_huge_page(page);
|
split_huge_page(page);
|
||||||
|
|
||||||
put_page(page);
|
put_page(page);
|
||||||
BUG_ON(pmd_trans_huge(*pmd));
|
|
||||||
|
/*
|
||||||
|
* We don't always have down_write of mmap_sem here: a racing
|
||||||
|
* do_huge_pmd_wp_page() might have copied-on-write to another
|
||||||
|
* huge page before our split_huge_page() got the anon_vma lock.
|
||||||
|
*/
|
||||||
|
if (unlikely(pmd_trans_huge(*pmd)))
|
||||||
|
goto again;
|
||||||
}
|
}
|
||||||
|
|
||||||
void split_huge_page_pmd_mm(struct mm_struct *mm, unsigned long address,
|
void split_huge_page_pmd_mm(struct mm_struct *mm, unsigned long address,
|
||||||
|
17
mm/hugetlb.c
17
mm/hugetlb.c
@ -653,6 +653,7 @@ static void free_huge_page(struct page *page)
|
|||||||
BUG_ON(page_count(page));
|
BUG_ON(page_count(page));
|
||||||
BUG_ON(page_mapcount(page));
|
BUG_ON(page_mapcount(page));
|
||||||
restore_reserve = PagePrivate(page);
|
restore_reserve = PagePrivate(page);
|
||||||
|
ClearPagePrivate(page);
|
||||||
|
|
||||||
spin_lock(&hugetlb_lock);
|
spin_lock(&hugetlb_lock);
|
||||||
hugetlb_cgroup_uncharge_page(hstate_index(h),
|
hugetlb_cgroup_uncharge_page(hstate_index(h),
|
||||||
@ -695,8 +696,22 @@ static void prep_compound_gigantic_page(struct page *page, unsigned long order)
|
|||||||
/* we rely on prep_new_huge_page to set the destructor */
|
/* we rely on prep_new_huge_page to set the destructor */
|
||||||
set_compound_order(page, order);
|
set_compound_order(page, order);
|
||||||
__SetPageHead(page);
|
__SetPageHead(page);
|
||||||
|
__ClearPageReserved(page);
|
||||||
for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
|
for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
|
||||||
__SetPageTail(p);
|
__SetPageTail(p);
|
||||||
|
/*
|
||||||
|
* For gigantic hugepages allocated through bootmem at
|
||||||
|
* boot, it's safer to be consistent with the not-gigantic
|
||||||
|
* hugepages and clear the PG_reserved bit from all tail pages
|
||||||
|
* too. Otherwse drivers using get_user_pages() to access tail
|
||||||
|
* pages may get the reference counting wrong if they see
|
||||||
|
* PG_reserved set on a tail page (despite the head page not
|
||||||
|
* having PG_reserved set). Enforcing this consistency between
|
||||||
|
* head and tail pages allows drivers to optimize away a check
|
||||||
|
* on the head page when they need know if put_page() is needed
|
||||||
|
* after get_user_pages().
|
||||||
|
*/
|
||||||
|
__ClearPageReserved(p);
|
||||||
set_page_count(p, 0);
|
set_page_count(p, 0);
|
||||||
p->first_page = page;
|
p->first_page = page;
|
||||||
}
|
}
|
||||||
@ -1329,9 +1344,9 @@ static void __init gather_bootmem_prealloc(void)
|
|||||||
#else
|
#else
|
||||||
page = virt_to_page(m);
|
page = virt_to_page(m);
|
||||||
#endif
|
#endif
|
||||||
__ClearPageReserved(page);
|
|
||||||
WARN_ON(page_count(page) != 1);
|
WARN_ON(page_count(page) != 1);
|
||||||
prep_compound_huge_page(page, h->order);
|
prep_compound_huge_page(page, h->order);
|
||||||
|
WARN_ON(PageReserved(page));
|
||||||
prep_new_huge_page(h, page, page_to_nid(page));
|
prep_new_huge_page(h, page, page_to_nid(page));
|
||||||
/*
|
/*
|
||||||
* If we had gigantic hugepages allocated at boot time, we need
|
* If we had gigantic hugepages allocated at boot time, we need
|
||||||
|
177
mm/memcontrol.c
177
mm/memcontrol.c
@ -866,6 +866,7 @@ static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg,
|
|||||||
unsigned long val = 0;
|
unsigned long val = 0;
|
||||||
int cpu;
|
int cpu;
|
||||||
|
|
||||||
|
get_online_cpus();
|
||||||
for_each_online_cpu(cpu)
|
for_each_online_cpu(cpu)
|
||||||
val += per_cpu(memcg->stat->events[idx], cpu);
|
val += per_cpu(memcg->stat->events[idx], cpu);
|
||||||
#ifdef CONFIG_HOTPLUG_CPU
|
#ifdef CONFIG_HOTPLUG_CPU
|
||||||
@ -873,6 +874,7 @@ static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg,
|
|||||||
val += memcg->nocpu_base.events[idx];
|
val += memcg->nocpu_base.events[idx];
|
||||||
spin_unlock(&memcg->pcp_counter_lock);
|
spin_unlock(&memcg->pcp_counter_lock);
|
||||||
#endif
|
#endif
|
||||||
|
put_online_cpus();
|
||||||
return val;
|
return val;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2159,27 +2161,67 @@ static void memcg_oom_recover(struct mem_cgroup *memcg)
|
|||||||
memcg_wakeup_oom(memcg);
|
memcg_wakeup_oom(memcg);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* try to call OOM killer
|
|
||||||
*/
|
|
||||||
static void mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order)
|
static void mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order)
|
||||||
{
|
{
|
||||||
bool locked;
|
|
||||||
int wakeups;
|
|
||||||
|
|
||||||
if (!current->memcg_oom.may_oom)
|
if (!current->memcg_oom.may_oom)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
current->memcg_oom.in_memcg_oom = 1;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* As with any blocking lock, a contender needs to start
|
* We are in the middle of the charge context here, so we
|
||||||
* listening for wakeups before attempting the trylock,
|
* don't want to block when potentially sitting on a callstack
|
||||||
* otherwise it can miss the wakeup from the unlock and sleep
|
* that holds all kinds of filesystem and mm locks.
|
||||||
* indefinitely. This is just open-coded because our locking
|
*
|
||||||
* is so particular to memcg hierarchies.
|
* Also, the caller may handle a failed allocation gracefully
|
||||||
|
* (like optional page cache readahead) and so an OOM killer
|
||||||
|
* invocation might not even be necessary.
|
||||||
|
*
|
||||||
|
* That's why we don't do anything here except remember the
|
||||||
|
* OOM context and then deal with it at the end of the page
|
||||||
|
* fault when the stack is unwound, the locks are released,
|
||||||
|
* and when we know whether the fault was overall successful.
|
||||||
*/
|
*/
|
||||||
wakeups = atomic_read(&memcg->oom_wakeups);
|
css_get(&memcg->css);
|
||||||
|
current->memcg_oom.memcg = memcg;
|
||||||
|
current->memcg_oom.gfp_mask = mask;
|
||||||
|
current->memcg_oom.order = order;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* mem_cgroup_oom_synchronize - complete memcg OOM handling
|
||||||
|
* @handle: actually kill/wait or just clean up the OOM state
|
||||||
|
*
|
||||||
|
* This has to be called at the end of a page fault if the memcg OOM
|
||||||
|
* handler was enabled.
|
||||||
|
*
|
||||||
|
* Memcg supports userspace OOM handling where failed allocations must
|
||||||
|
* sleep on a waitqueue until the userspace task resolves the
|
||||||
|
* situation. Sleeping directly in the charge context with all kinds
|
||||||
|
* of locks held is not a good idea, instead we remember an OOM state
|
||||||
|
* in the task and mem_cgroup_oom_synchronize() has to be called at
|
||||||
|
* the end of the page fault to complete the OOM handling.
|
||||||
|
*
|
||||||
|
* Returns %true if an ongoing memcg OOM situation was detected and
|
||||||
|
* completed, %false otherwise.
|
||||||
|
*/
|
||||||
|
bool mem_cgroup_oom_synchronize(bool handle)
|
||||||
|
{
|
||||||
|
struct mem_cgroup *memcg = current->memcg_oom.memcg;
|
||||||
|
struct oom_wait_info owait;
|
||||||
|
bool locked;
|
||||||
|
|
||||||
|
/* OOM is global, do not handle */
|
||||||
|
if (!memcg)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (!handle)
|
||||||
|
goto cleanup;
|
||||||
|
|
||||||
|
owait.memcg = memcg;
|
||||||
|
owait.wait.flags = 0;
|
||||||
|
owait.wait.func = memcg_oom_wake_function;
|
||||||
|
owait.wait.private = current;
|
||||||
|
INIT_LIST_HEAD(&owait.wait.task_list);
|
||||||
|
|
||||||
|
prepare_to_wait(&memcg_oom_waitq, &owait.wait, TASK_KILLABLE);
|
||||||
mem_cgroup_mark_under_oom(memcg);
|
mem_cgroup_mark_under_oom(memcg);
|
||||||
|
|
||||||
locked = mem_cgroup_oom_trylock(memcg);
|
locked = mem_cgroup_oom_trylock(memcg);
|
||||||
@ -2189,95 +2231,16 @@ static void mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order)
|
|||||||
|
|
||||||
if (locked && !memcg->oom_kill_disable) {
|
if (locked && !memcg->oom_kill_disable) {
|
||||||
mem_cgroup_unmark_under_oom(memcg);
|
mem_cgroup_unmark_under_oom(memcg);
|
||||||
mem_cgroup_out_of_memory(memcg, mask, order);
|
finish_wait(&memcg_oom_waitq, &owait.wait);
|
||||||
mem_cgroup_oom_unlock(memcg);
|
mem_cgroup_out_of_memory(memcg, current->memcg_oom.gfp_mask,
|
||||||
/*
|
current->memcg_oom.order);
|
||||||
* There is no guarantee that an OOM-lock contender
|
|
||||||
* sees the wakeups triggered by the OOM kill
|
|
||||||
* uncharges. Wake any sleepers explicitely.
|
|
||||||
*/
|
|
||||||
memcg_oom_recover(memcg);
|
|
||||||
} else {
|
} else {
|
||||||
/*
|
|
||||||
* A system call can just return -ENOMEM, but if this
|
|
||||||
* is a page fault and somebody else is handling the
|
|
||||||
* OOM already, we need to sleep on the OOM waitqueue
|
|
||||||
* for this memcg until the situation is resolved.
|
|
||||||
* Which can take some time because it might be
|
|
||||||
* handled by a userspace task.
|
|
||||||
*
|
|
||||||
* However, this is the charge context, which means
|
|
||||||
* that we may sit on a large call stack and hold
|
|
||||||
* various filesystem locks, the mmap_sem etc. and we
|
|
||||||
* don't want the OOM handler to deadlock on them
|
|
||||||
* while we sit here and wait. Store the current OOM
|
|
||||||
* context in the task_struct, then return -ENOMEM.
|
|
||||||
* At the end of the page fault handler, with the
|
|
||||||
* stack unwound, pagefault_out_of_memory() will check
|
|
||||||
* back with us by calling
|
|
||||||
* mem_cgroup_oom_synchronize(), possibly putting the
|
|
||||||
* task to sleep.
|
|
||||||
*/
|
|
||||||
current->memcg_oom.oom_locked = locked;
|
|
||||||
current->memcg_oom.wakeups = wakeups;
|
|
||||||
css_get(&memcg->css);
|
|
||||||
current->memcg_oom.wait_on_memcg = memcg;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* mem_cgroup_oom_synchronize - complete memcg OOM handling
|
|
||||||
*
|
|
||||||
* This has to be called at the end of a page fault if the the memcg
|
|
||||||
* OOM handler was enabled and the fault is returning %VM_FAULT_OOM.
|
|
||||||
*
|
|
||||||
* Memcg supports userspace OOM handling, so failed allocations must
|
|
||||||
* sleep on a waitqueue until the userspace task resolves the
|
|
||||||
* situation. Sleeping directly in the charge context with all kinds
|
|
||||||
* of locks held is not a good idea, instead we remember an OOM state
|
|
||||||
* in the task and mem_cgroup_oom_synchronize() has to be called at
|
|
||||||
* the end of the page fault to put the task to sleep and clean up the
|
|
||||||
* OOM state.
|
|
||||||
*
|
|
||||||
* Returns %true if an ongoing memcg OOM situation was detected and
|
|
||||||
* finalized, %false otherwise.
|
|
||||||
*/
|
|
||||||
bool mem_cgroup_oom_synchronize(void)
|
|
||||||
{
|
|
||||||
struct oom_wait_info owait;
|
|
||||||
struct mem_cgroup *memcg;
|
|
||||||
|
|
||||||
/* OOM is global, do not handle */
|
|
||||||
if (!current->memcg_oom.in_memcg_oom)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* We invoked the OOM killer but there is a chance that a kill
|
|
||||||
* did not free up any charges. Everybody else might already
|
|
||||||
* be sleeping, so restart the fault and keep the rampage
|
|
||||||
* going until some charges are released.
|
|
||||||
*/
|
|
||||||
memcg = current->memcg_oom.wait_on_memcg;
|
|
||||||
if (!memcg)
|
|
||||||
goto out;
|
|
||||||
|
|
||||||
if (test_thread_flag(TIF_MEMDIE) || fatal_signal_pending(current))
|
|
||||||
goto out_memcg;
|
|
||||||
|
|
||||||
owait.memcg = memcg;
|
|
||||||
owait.wait.flags = 0;
|
|
||||||
owait.wait.func = memcg_oom_wake_function;
|
|
||||||
owait.wait.private = current;
|
|
||||||
INIT_LIST_HEAD(&owait.wait.task_list);
|
|
||||||
|
|
||||||
prepare_to_wait(&memcg_oom_waitq, &owait.wait, TASK_KILLABLE);
|
|
||||||
/* Only sleep if we didn't miss any wakeups since OOM */
|
|
||||||
if (atomic_read(&memcg->oom_wakeups) == current->memcg_oom.wakeups)
|
|
||||||
schedule();
|
schedule();
|
||||||
finish_wait(&memcg_oom_waitq, &owait.wait);
|
mem_cgroup_unmark_under_oom(memcg);
|
||||||
out_memcg:
|
finish_wait(&memcg_oom_waitq, &owait.wait);
|
||||||
mem_cgroup_unmark_under_oom(memcg);
|
}
|
||||||
if (current->memcg_oom.oom_locked) {
|
|
||||||
|
if (locked) {
|
||||||
mem_cgroup_oom_unlock(memcg);
|
mem_cgroup_oom_unlock(memcg);
|
||||||
/*
|
/*
|
||||||
* There is no guarantee that an OOM-lock contender
|
* There is no guarantee that an OOM-lock contender
|
||||||
@ -2286,10 +2249,9 @@ bool mem_cgroup_oom_synchronize(void)
|
|||||||
*/
|
*/
|
||||||
memcg_oom_recover(memcg);
|
memcg_oom_recover(memcg);
|
||||||
}
|
}
|
||||||
|
cleanup:
|
||||||
|
current->memcg_oom.memcg = NULL;
|
||||||
css_put(&memcg->css);
|
css_put(&memcg->css);
|
||||||
current->memcg_oom.wait_on_memcg = NULL;
|
|
||||||
out:
|
|
||||||
current->memcg_oom.in_memcg_oom = 0;
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2703,6 +2665,9 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
|
|||||||
|| fatal_signal_pending(current)))
|
|| fatal_signal_pending(current)))
|
||||||
goto bypass;
|
goto bypass;
|
||||||
|
|
||||||
|
if (unlikely(task_in_memcg_oom(current)))
|
||||||
|
goto bypass;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We always charge the cgroup the mm_struct belongs to.
|
* We always charge the cgroup the mm_struct belongs to.
|
||||||
* The mm_struct's mem_cgroup changes on task migration if the
|
* The mm_struct's mem_cgroup changes on task migration if the
|
||||||
@ -2801,6 +2766,8 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
|
|||||||
return 0;
|
return 0;
|
||||||
nomem:
|
nomem:
|
||||||
*ptr = NULL;
|
*ptr = NULL;
|
||||||
|
if (gfp_mask & __GFP_NOFAIL)
|
||||||
|
return 0;
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
bypass:
|
bypass:
|
||||||
*ptr = root_mem_cgroup;
|
*ptr = root_mem_cgroup;
|
||||||
|
20
mm/memory.c
20
mm/memory.c
@ -837,6 +837,8 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
|
|||||||
*/
|
*/
|
||||||
make_migration_entry_read(&entry);
|
make_migration_entry_read(&entry);
|
||||||
pte = swp_entry_to_pte(entry);
|
pte = swp_entry_to_pte(entry);
|
||||||
|
if (pte_swp_soft_dirty(*src_pte))
|
||||||
|
pte = pte_swp_mksoft_dirty(pte);
|
||||||
set_pte_at(src_mm, addr, src_pte, pte);
|
set_pte_at(src_mm, addr, src_pte, pte);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -3863,15 +3865,21 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
|
|||||||
* space. Kernel faults are handled more gracefully.
|
* space. Kernel faults are handled more gracefully.
|
||||||
*/
|
*/
|
||||||
if (flags & FAULT_FLAG_USER)
|
if (flags & FAULT_FLAG_USER)
|
||||||
mem_cgroup_enable_oom();
|
mem_cgroup_oom_enable();
|
||||||
|
|
||||||
ret = __handle_mm_fault(mm, vma, address, flags);
|
ret = __handle_mm_fault(mm, vma, address, flags);
|
||||||
|
|
||||||
if (flags & FAULT_FLAG_USER)
|
if (flags & FAULT_FLAG_USER) {
|
||||||
mem_cgroup_disable_oom();
|
mem_cgroup_oom_disable();
|
||||||
|
/*
|
||||||
if (WARN_ON(task_in_memcg_oom(current) && !(ret & VM_FAULT_OOM)))
|
* The task may have entered a memcg OOM situation but
|
||||||
mem_cgroup_oom_synchronize();
|
* if the allocation error was handled gracefully (no
|
||||||
|
* VM_FAULT_OOM), there is no need to kill anything.
|
||||||
|
* Just clean up the OOM state peacefully.
|
||||||
|
*/
|
||||||
|
if (task_in_memcg_oom(current) && !(ret & VM_FAULT_OOM))
|
||||||
|
mem_cgroup_oom_synchronize(false);
|
||||||
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -161,6 +161,8 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
|
|||||||
|
|
||||||
get_page(new);
|
get_page(new);
|
||||||
pte = pte_mkold(mk_pte(new, vma->vm_page_prot));
|
pte = pte_mkold(mk_pte(new, vma->vm_page_prot));
|
||||||
|
if (pte_swp_soft_dirty(*ptep))
|
||||||
|
pte = pte_mksoft_dirty(pte);
|
||||||
if (is_write_migration_entry(entry))
|
if (is_write_migration_entry(entry))
|
||||||
pte = pte_mkwrite(pte);
|
pte = pte_mkwrite(pte);
|
||||||
#ifdef CONFIG_HUGETLB_PAGE
|
#ifdef CONFIG_HUGETLB_PAGE
|
||||||
|
@ -94,13 +94,16 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
|
|||||||
swp_entry_t entry = pte_to_swp_entry(oldpte);
|
swp_entry_t entry = pte_to_swp_entry(oldpte);
|
||||||
|
|
||||||
if (is_write_migration_entry(entry)) {
|
if (is_write_migration_entry(entry)) {
|
||||||
|
pte_t newpte;
|
||||||
/*
|
/*
|
||||||
* A protection check is difficult so
|
* A protection check is difficult so
|
||||||
* just be safe and disable write
|
* just be safe and disable write
|
||||||
*/
|
*/
|
||||||
make_migration_entry_read(&entry);
|
make_migration_entry_read(&entry);
|
||||||
set_pte_at(mm, addr, pte,
|
newpte = swp_entry_to_pte(entry);
|
||||||
swp_entry_to_pte(entry));
|
if (pte_swp_soft_dirty(oldpte))
|
||||||
|
newpte = pte_swp_mksoft_dirty(newpte);
|
||||||
|
set_pte_at(mm, addr, pte, newpte);
|
||||||
}
|
}
|
||||||
pages++;
|
pages++;
|
||||||
}
|
}
|
||||||
|
@ -25,7 +25,6 @@
|
|||||||
#include <asm/uaccess.h>
|
#include <asm/uaccess.h>
|
||||||
#include <asm/cacheflush.h>
|
#include <asm/cacheflush.h>
|
||||||
#include <asm/tlbflush.h>
|
#include <asm/tlbflush.h>
|
||||||
#include <asm/pgalloc.h>
|
|
||||||
|
|
||||||
#include "internal.h"
|
#include "internal.h"
|
||||||
|
|
||||||
@ -63,10 +62,8 @@ static pmd_t *alloc_new_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
|
|||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
pmd = pmd_alloc(mm, pud, addr);
|
pmd = pmd_alloc(mm, pud, addr);
|
||||||
if (!pmd) {
|
if (!pmd)
|
||||||
pud_free(mm, pud);
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
|
||||||
|
|
||||||
VM_BUG_ON(pmd_trans_huge(*pmd));
|
VM_BUG_ON(pmd_trans_huge(*pmd));
|
||||||
|
|
||||||
|
@ -680,7 +680,7 @@ void pagefault_out_of_memory(void)
|
|||||||
{
|
{
|
||||||
struct zonelist *zonelist;
|
struct zonelist *zonelist;
|
||||||
|
|
||||||
if (mem_cgroup_oom_synchronize())
|
if (mem_cgroup_oom_synchronize(true))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
zonelist = node_zonelist(first_online_node, GFP_KERNEL);
|
zonelist = node_zonelist(first_online_node, GFP_KERNEL);
|
||||||
|
@ -1210,11 +1210,11 @@ static unsigned long dirty_poll_interval(unsigned long dirty,
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static long bdi_max_pause(struct backing_dev_info *bdi,
|
static unsigned long bdi_max_pause(struct backing_dev_info *bdi,
|
||||||
unsigned long bdi_dirty)
|
unsigned long bdi_dirty)
|
||||||
{
|
{
|
||||||
long bw = bdi->avg_write_bandwidth;
|
unsigned long bw = bdi->avg_write_bandwidth;
|
||||||
long t;
|
unsigned long t;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Limit pause time for small memory systems. If sleeping for too long
|
* Limit pause time for small memory systems. If sleeping for too long
|
||||||
@ -1226,7 +1226,7 @@ static long bdi_max_pause(struct backing_dev_info *bdi,
|
|||||||
t = bdi_dirty / (1 + bw / roundup_pow_of_two(1 + HZ / 8));
|
t = bdi_dirty / (1 + bw / roundup_pow_of_two(1 + HZ / 8));
|
||||||
t++;
|
t++;
|
||||||
|
|
||||||
return min_t(long, t, MAX_PAUSE);
|
return min_t(unsigned long, t, MAX_PAUSE);
|
||||||
}
|
}
|
||||||
|
|
||||||
static long bdi_min_pause(struct backing_dev_info *bdi,
|
static long bdi_min_pause(struct backing_dev_info *bdi,
|
||||||
|
@ -1824,6 +1824,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
|
|||||||
struct filename *pathname;
|
struct filename *pathname;
|
||||||
int i, type, prev;
|
int i, type, prev;
|
||||||
int err;
|
int err;
|
||||||
|
unsigned int old_block_size;
|
||||||
|
|
||||||
if (!capable(CAP_SYS_ADMIN))
|
if (!capable(CAP_SYS_ADMIN))
|
||||||
return -EPERM;
|
return -EPERM;
|
||||||
@ -1914,6 +1915,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
|
|||||||
}
|
}
|
||||||
|
|
||||||
swap_file = p->swap_file;
|
swap_file = p->swap_file;
|
||||||
|
old_block_size = p->old_block_size;
|
||||||
p->swap_file = NULL;
|
p->swap_file = NULL;
|
||||||
p->max = 0;
|
p->max = 0;
|
||||||
swap_map = p->swap_map;
|
swap_map = p->swap_map;
|
||||||
@ -1938,7 +1940,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
|
|||||||
inode = mapping->host;
|
inode = mapping->host;
|
||||||
if (S_ISBLK(inode->i_mode)) {
|
if (S_ISBLK(inode->i_mode)) {
|
||||||
struct block_device *bdev = I_BDEV(inode);
|
struct block_device *bdev = I_BDEV(inode);
|
||||||
set_blocksize(bdev, p->old_block_size);
|
set_blocksize(bdev, old_block_size);
|
||||||
blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
|
blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
|
||||||
} else {
|
} else {
|
||||||
mutex_lock(&inode->i_mutex);
|
mutex_lock(&inode->i_mutex);
|
||||||
|
@ -211,6 +211,7 @@ void unregister_shrinker(struct shrinker *shrinker)
|
|||||||
down_write(&shrinker_rwsem);
|
down_write(&shrinker_rwsem);
|
||||||
list_del(&shrinker->list);
|
list_del(&shrinker->list);
|
||||||
up_write(&shrinker_rwsem);
|
up_write(&shrinker_rwsem);
|
||||||
|
kfree(shrinker->nr_deferred);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(unregister_shrinker);
|
EXPORT_SYMBOL(unregister_shrinker);
|
||||||
|
|
||||||
|
@ -804,6 +804,10 @@ static void zswap_frontswap_invalidate_area(unsigned type)
|
|||||||
}
|
}
|
||||||
tree->rbroot = RB_ROOT;
|
tree->rbroot = RB_ROOT;
|
||||||
spin_unlock(&tree->lock);
|
spin_unlock(&tree->lock);
|
||||||
|
|
||||||
|
zbud_destroy_pool(tree->pool);
|
||||||
|
kfree(tree);
|
||||||
|
zswap_trees[type] = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct zbud_ops zswap_zbud_ops = {
|
static struct zbud_ops zswap_zbud_ops = {
|
||||||
|
@ -151,7 +151,7 @@ static int check_timer_create(int which)
|
|||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
|
|
||||||
done = 0;
|
done = 0;
|
||||||
timer_create(which, NULL, &id);
|
err = timer_create(which, NULL, &id);
|
||||||
if (err < 0) {
|
if (err < 0) {
|
||||||
perror("Can't create timer\n");
|
perror("Can't create timer\n");
|
||||||
return -1;
|
return -1;
|
||||||
|
Loading…
Reference in New Issue
Block a user