Merge branch 'akpm' (fixes from Andrew Morton)

Merge misc fixes from Andrew Morton. * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (21 commits) mm: revert mremap pud_free anti-fix mm: fix BUG in __split_huge_page_pmd swap: fix set_blocksize race during swapon/swapoff procfs: call default get_unmapped_area on MMU-present architectures procfs: fix unintended truncation of returned mapped address writeback: fix negative bdi max pause percpu_refcount: export symbols fs: buffer: move allocation failure loop into the allocator mm: memcg: handle non-error OOM situations more gracefully tools/testing/selftests: fix uninitialized variable block/partitions/efi.c: treat size mismatch as a warning, not an error mm: hugetlb: initialize PG_reserved for tail pages of gigantic compound pages mm/zswap: bugfix: memory leak when re-swapon mm: /proc/pid/pagemap: inspect _PAGE_SOFT_DIRTY only on present pages mm: migration: do not lose soft dirty bit if page is in migration state gcov: MAINTAINERS: Add an entry for gcov mm/hugetlb.c: correct missing private flag clearing mm/vmscan.c: don't forget to free shrinker->nr_deferred ipc/sem.c: synchronize semop and semctl with IPC_RMID ipc: update locking scheme comments ...
2013-10-16 21:36:03 -07:00 · 2013-10-16 21:36:03 -07:00 · 056cdce0d3
commit 056cdce0d3
parent 0056019da4 57a8f0cdb8
24 changed files with 236 additions and 206 deletions
--- a/6
+++ b/6
@ -3624,6 +3624,12 @@ L:	linux-scsi@vger.kernel.org
 S:	Odd Fixes (e.g., new signatures)
 F:	drivers/scsi/fdomain.*
 GCOV BASED KERNEL PROFILING
 M:	Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
 S:	Maintained
 F:	kernel/gcov/
 F:	Documentation/gcov.txt
 GDT SCSI DISK ARRAY CONTROLLER DRIVER
 M:	Achim Leubner <achim_leubner@adaptec.com>
 L:	linux-scsi@vger.kernel.org
--- a/block/partitions/efi.c
+++ b/block/partitions/efi.c
@ -222,11 +222,16 @@ static int is_pmbr_valid(legacy_mbr *mbr, sector_t total_sectors)
 	 * the disk size.
 	 *
 	 * Hybrid MBRs do not necessarily comply with this.
 	 *
 	 * Consider a bad value here to be a warning to support dd'ing
 	 * an image from a smaller disk to a larger disk.
 	 */
 	if (ret == GPT_MBR_PROTECTIVE) {
 		sz = le32_to_cpu(mbr->partition_record[part].size_in_lba);
 		if (sz != (uint32_t) total_sectors - 1 && sz != 0xFFFFFFFF)
-			ret = 0;
+			pr_debug("GPT: mbr size in lba (%u) different than whole disk (%u).\n",
 				 sz, min_t(uint32_t,
 					   total_sectors - 1, 0xFFFFFFFF));
 	}
 done:
 	return ret;
--- a/fs/buffer.c
+++ b/fs/buffer.c
@ -1005,9 +1005,19 @@ grow_dev_page(struct block_device *bdev, sector_t block,
 	struct buffer_head *bh;
 	sector_t end_block;
 	int ret = 0;		/* Will call free_more_memory() */
 	gfp_t gfp_mask;
-	page = find_or_create_page(inode->i_mapping, index,
+	gfp_mask = mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS;
-		(mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE);
+	gfp_mask |= __GFP_MOVABLE;
 	/*
 	 * XXX: __getblk_slow() can not really deal with failure and
 	 * will endlessly loop on improvised global reclaim.  Prefer
 	 * looping in the allocator rather than here, at least that
 	 * code knows what it's doing.
 	 */
 	gfp_mask |= __GFP_NOFAIL;
 	page = find_or_create_page(inode->i_mapping, index, gfp_mask);
 	if (!page)
 		return ret;
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@ -288,10 +288,14 @@ static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma)
 static unsigned long proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr, unsigned long len, unsigned long pgoff, unsigned long flags)
 {
 	struct proc_dir_entry *pde = PDE(file_inode(file));
-	int rv = -EIO;
+	unsigned long rv = -EIO;
-	unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
+	unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long) = NULL;
 	if (use_pde(pde)) {
-		get_unmapped_area = pde->proc_fops->get_unmapped_area;
+#ifdef CONFIG_MMU
 		get_unmapped_area = current->mm->get_unmapped_area;
 #endif
 		if (pde->proc_fops->get_unmapped_area)
 			get_unmapped_area = pde->proc_fops->get_unmapped_area;
 		if (get_unmapped_area)
 			rv = get_unmapped_area(file, orig_addr, len, pgoff, flags);
 		unuse_pde(pde);
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@ -941,6 +941,8 @@ static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
 		frame = pte_pfn(pte);
 		flags = PM_PRESENT;
 		page = vm_normal_page(vma, addr, pte);
 		if (pte_soft_dirty(pte))
 			flags2 |= __PM_SOFT_DIRTY;
 	} else if (is_swap_pte(pte)) {
 		swp_entry_t entry;
 		if (pte_swp_soft_dirty(pte))
@ -960,7 +962,7 @@ static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
 	if (page && !PageAnon(page))
 		flags |= PM_FILE;
-	if ((vma->vm_flags & VM_SOFTDIRTY) || pte_soft_dirty(pte))
+	if ((vma->vm_flags & VM_SOFTDIRTY))
 		flags2 |= __PM_SOFT_DIRTY;
 	*pme = make_pme(PM_PFRAME(frame) | PM_STATUS2(pm->v2, flags2) | flags);
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@ -137,47 +137,24 @@ extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
 extern void mem_cgroup_replace_page_cache(struct page *oldpage,
 					struct page *newpage);
-/**
+static inline void mem_cgroup_oom_enable(void)
 * mem_cgroup_toggle_oom - toggle the memcg OOM killer for the current task
 * @new: true to enable, false to disable
 *
 * Toggle whether a failed memcg charge should invoke the OOM killer
 * or just return -ENOMEM.  Returns the previous toggle state.
 *
 * NOTE: Any path that enables the OOM killer before charging must
 *       call mem_cgroup_oom_synchronize() afterward to finalize the
 *       OOM handling and clean up.
 */
 static inline bool mem_cgroup_toggle_oom(bool new)
 {
-	bool old;
+	WARN_ON(current->memcg_oom.may_oom);
-
+	current->memcg_oom.may_oom = 1;
 	old = current->memcg_oom.may_oom;
 	current->memcg_oom.may_oom = new;
 	return old;
 }
-static inline void mem_cgroup_enable_oom(void)
+static inline void mem_cgroup_oom_disable(void)
 {
-	bool old = mem_cgroup_toggle_oom(true);
+	WARN_ON(!current->memcg_oom.may_oom);
-
+	current->memcg_oom.may_oom = 0;
 	WARN_ON(old == true);
 }
 static inline void mem_cgroup_disable_oom(void)
 {
 	bool old = mem_cgroup_toggle_oom(false);
 	WARN_ON(old == false);
 }
 static inline bool task_in_memcg_oom(struct task_struct *p)
 {
-	return p->memcg_oom.in_memcg_oom;
+	return p->memcg_oom.memcg;
 }
-bool mem_cgroup_oom_synchronize(void);
+bool mem_cgroup_oom_synchronize(bool wait);
 #ifdef CONFIG_MEMCG_SWAP
 extern int do_swap_account;
@ -402,16 +379,11 @@ static inline void mem_cgroup_end_update_page_stat(struct page *page,
 {
 }
-static inline bool mem_cgroup_toggle_oom(bool new)
+static inline void mem_cgroup_oom_enable(void)
 {
 	return false;
 }
 static inline void mem_cgroup_enable_oom(void)
 {
 }
-static inline void mem_cgroup_disable_oom(void)
+static inline void mem_cgroup_oom_disable(void)
 {
 }
@ -420,7 +392,7 @@ static inline bool task_in_memcg_oom(struct task_struct *p)
 	return false;
 }
-static inline bool mem_cgroup_oom_synchronize(void)
+static inline bool mem_cgroup_oom_synchronize(bool wait)
 {
 	return false;
 }
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@ -1394,11 +1394,10 @@ struct task_struct {
 	} memcg_batch;
 	unsigned int memcg_kmem_skip_account;
 	struct memcg_oom_info {
 		struct mem_cgroup *memcg;
 		gfp_t gfp_mask;
 		int order;
 		unsigned int may_oom:1;
 		unsigned int in_memcg_oom:1;
 		unsigned int oom_locked:1;
 		int wakeups;
 		struct mem_cgroup *wait_on_memcg;
 	} memcg_oom;
 #endif
 #ifdef CONFIG_UPROBES
--- a/ipc/sem.c
+++ b/ipc/sem.c
@ -1282,6 +1282,12 @@ static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum,
 	sem_lock(sma, NULL, -1);
 	if (sma->sem_perm.deleted) {
 		sem_unlock(sma, -1);
 		rcu_read_unlock();
 		return -EIDRM;
 	}
 	curr = &sma->sem_base[semnum];
 	ipc_assert_locked_object(&sma->sem_perm);
@ -1336,12 +1342,14 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
 		int i;
 		sem_lock(sma, NULL, -1);
 		if (sma->sem_perm.deleted) {
 			err = -EIDRM;
 			goto out_unlock;
 		}
 		if(nsems > SEMMSL_FAST) {
 			if (!ipc_rcu_getref(sma)) {
 				sem_unlock(sma, -1);
 				rcu_read_unlock();
 				err = -EIDRM;
-				goto out_free;
+				goto out_unlock;
 			}
 			sem_unlock(sma, -1);
 			rcu_read_unlock();
@ -1354,10 +1362,8 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
 			rcu_read_lock();
 			sem_lock_and_putref(sma);
 			if (sma->sem_perm.deleted) {
 				sem_unlock(sma, -1);
 				rcu_read_unlock();
 				err = -EIDRM;
-				goto out_free;
+				goto out_unlock;
 			}
 		}
 		for (i = 0; i < sma->sem_nsems; i++)
@ -1375,8 +1381,8 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
 		struct sem_undo *un;
 		if (!ipc_rcu_getref(sma)) {
-			rcu_read_unlock();
+			err = -EIDRM;
-			return -EIDRM;
+			goto out_rcu_wakeup;
 		}
 		rcu_read_unlock();
@ -1404,10 +1410,8 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
 		rcu_read_lock();
 		sem_lock_and_putref(sma);
 		if (sma->sem_perm.deleted) {
 			sem_unlock(sma, -1);
 			rcu_read_unlock();
 			err = -EIDRM;
-			goto out_free;
+			goto out_unlock;
 		}
 		for (i = 0; i < nsems; i++)
@ -1431,6 +1435,10 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
 		goto out_rcu_wakeup;
 	sem_lock(sma, NULL, -1);
 	if (sma->sem_perm.deleted) {
 		err = -EIDRM;
 		goto out_unlock;
 	}
 	curr = &sma->sem_base[semnum];
 	switch (cmd) {
@ -1836,6 +1844,10 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
 	if (error)
 		goto out_rcu_wakeup;
 	error = -EIDRM;
 	locknum = sem_lock(sma, sops, nsops);
 	if (sma->sem_perm.deleted)
 		goto out_unlock_free;
 	/*
 	 * semid identifiers are not unique - find_alloc_undo may have
 	 * allocated an undo structure, it was invalidated by an RMID
@ -1843,8 +1855,6 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
 	 * This case can be detected checking un->semid. The existence of
 	 * "un" itself is guaranteed by rcu.
 	 */
 	error = -EIDRM;
 	locknum = sem_lock(sma, sops, nsops);
 	if (un && un->semid == -1)
 		goto out_unlock_free;
@ -2057,6 +2067,12 @@ void exit_sem(struct task_struct *tsk)
 		}
 		sem_lock(sma, NULL, -1);
 		/* exit_sem raced with IPC_RMID, nothing to do */
 		if (sma->sem_perm.deleted) {
 			sem_unlock(sma, -1);
 			rcu_read_unlock();
 			continue;
 		}
 		un = __lookup_undo(ulp, semid);
 		if (un == NULL) {
 			/* exit_sem raced with IPC_RMID+semget() that created
--- a/ipc/util.c
+++ b/ipc/util.c
@ -17,12 +17,27 @@
 *            Pavel Emelianov <xemul@openvz.org>
 *
 * General sysv ipc locking scheme:
- *  when doing ipc id lookups, take the ids->rwsem
+ *	rcu_read_lock()
- *      rcu_read_lock()
+ *          obtain the ipc object (kern_ipc_perm) by looking up the id in an idr
- *          obtain the ipc object (kern_ipc_perm)
+ *	    tree.
- *          perform security, capabilities, auditing and permission checks, etc.
+ *	    - perform initial checks (capabilities, auditing and permission,
- *          acquire the ipc lock (kern_ipc_perm.lock) throught ipc_lock_object()
+ *	      etc).
- *             perform data updates (ie: SET, RMID, LOCK/UNLOCK commands)
+ *	    - perform read-only operations, such as STAT, INFO commands.
 *	      acquire the ipc lock (kern_ipc_perm.lock) through
 *	      ipc_lock_object()
 *		- perform data updates, such as SET, RMID commands and
 *		  mechanism-specific operations (semop/semtimedop,
 *		  msgsnd/msgrcv, shmat/shmdt).
 *	    drop the ipc lock, through ipc_unlock_object().
 *	rcu_read_unlock()
 *
 *  The ids->rwsem must be taken when:
 *	- creating, removing and iterating the existing entries in ipc
 *	  identifier sets.
 *	- iterating through files under /proc/sysvipc/
 *
 *  Note that sems have a special fast path that avoids kern_ipc_perm.lock -
 *  see sem_lock().
 */
 #include <linux/mm.h>
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@ -53,6 +53,7 @@ int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release)
 	ref->release = release;
 	return 0;
 }
 EXPORT_SYMBOL_GPL(percpu_ref_init);
 /**
 * percpu_ref_cancel_init - cancel percpu_ref_init()
@ -84,6 +85,7 @@ void percpu_ref_cancel_init(struct percpu_ref *ref)
 		free_percpu(ref->pcpu_count);
 	}
 }
 EXPORT_SYMBOL_GPL(percpu_ref_cancel_init);
 static void percpu_ref_kill_rcu(struct rcu_head *rcu)
 {
@ -156,3 +158,4 @@ void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 	call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu);
 }
 EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
--- a/mm/filemap.c
+++ b/mm/filemap.c
@ -1616,7 +1616,6 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	struct inode *inode = mapping->host;
 	pgoff_t offset = vmf->pgoff;
 	struct page *page;
 	bool memcg_oom;
 	pgoff_t size;
 	int ret = 0;
@ -1625,11 +1624,7 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 		return VM_FAULT_SIGBUS;
 	/*
-	 * Do we have something in the page cache already?  Either
+	 * Do we have something in the page cache already?
 	 * way, try readahead, but disable the memcg OOM killer for it
 	 * as readahead is optional and no errors are propagated up
 	 * the fault stack.  The OOM killer is enabled while trying to
 	 * instantiate the faulting page individually below.
 	 */
 	page = find_get_page(mapping, offset);
 	if (likely(page) && !(vmf->flags & FAULT_FLAG_TRIED)) {
@ -1637,14 +1632,10 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 		 * We found the page, so try async readahead before
 		 * waiting for the lock.
 		 */
 		memcg_oom = mem_cgroup_toggle_oom(false);
 		do_async_mmap_readahead(vma, ra, file, page, offset);
 		mem_cgroup_toggle_oom(memcg_oom);
 	} else if (!page) {
 		/* No page in the page cache at all */
 		memcg_oom = mem_cgroup_toggle_oom(false);
 		do_sync_mmap_readahead(vma, ra, file, offset);
 		mem_cgroup_toggle_oom(memcg_oom);
 		count_vm_event(PGMAJFAULT);
 		mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
 		ret = VM_FAULT_MAJOR;
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@ -2697,6 +2697,7 @@ void __split_huge_page_pmd(struct vm_area_struct *vma, unsigned long address,
 	mmun_start = haddr;
 	mmun_end   = haddr + HPAGE_PMD_SIZE;
 again:
 	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
 	spin_lock(&mm->page_table_lock);
 	if (unlikely(!pmd_trans_huge(*pmd))) {
@ -2719,7 +2720,14 @@ void __split_huge_page_pmd(struct vm_area_struct *vma, unsigned long address,
 	split_huge_page(page);
 	put_page(page);
-	BUG_ON(pmd_trans_huge(*pmd));
+
 	/*
 	 * We don't always have down_write of mmap_sem here: a racing
 	 * do_huge_pmd_wp_page() might have copied-on-write to another
 	 * huge page before our split_huge_page() got the anon_vma lock.
 	 */
 	if (unlikely(pmd_trans_huge(*pmd)))
 		goto again;
 }
 void split_huge_page_pmd_mm(struct mm_struct *mm, unsigned long address,
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@ -653,6 +653,7 @@ static void free_huge_page(struct page *page)
 	BUG_ON(page_count(page));
 	BUG_ON(page_mapcount(page));
 	restore_reserve = PagePrivate(page);
 	ClearPagePrivate(page);
 	spin_lock(&hugetlb_lock);
 	hugetlb_cgroup_uncharge_page(hstate_index(h),
@ -695,8 +696,22 @@ static void prep_compound_gigantic_page(struct page *page, unsigned long order)
 	/* we rely on prep_new_huge_page to set the destructor */
 	set_compound_order(page, order);
 	__SetPageHead(page);
 	__ClearPageReserved(page);
 	for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
 		__SetPageTail(p);
 		/*
 		 * For gigantic hugepages allocated through bootmem at
 		 * boot, it's safer to be consistent with the not-gigantic
 		 * hugepages and clear the PG_reserved bit from all tail pages
 		 * too.  Otherwse drivers using get_user_pages() to access tail
 		 * pages may get the reference counting wrong if they see
 		 * PG_reserved set on a tail page (despite the head page not
 		 * having PG_reserved set).  Enforcing this consistency between
 		 * head and tail pages allows drivers to optimize away a check
 		 * on the head page when they need know if put_page() is needed
 		 * after get_user_pages().
 		 */
 		__ClearPageReserved(p);
 		set_page_count(p, 0);
 		p->first_page = page;
 	}
@ -1329,9 +1344,9 @@ static void __init gather_bootmem_prealloc(void)
 #else
 		page = virt_to_page(m);
 #endif
 		__ClearPageReserved(page);
 		WARN_ON(page_count(page) != 1);
 		prep_compound_huge_page(page, h->order);
 		WARN_ON(PageReserved(page));
 		prep_new_huge_page(h, page, page_to_nid(page));
 		/*
 		 * If we had gigantic hugepages allocated at boot time, we need
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@ -866,6 +866,7 @@ static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg,
 	unsigned long val = 0;
 	int cpu;
 	get_online_cpus();
 	for_each_online_cpu(cpu)
 		val += per_cpu(memcg->stat->events[idx], cpu);
 #ifdef CONFIG_HOTPLUG_CPU
@ -873,6 +874,7 @@ static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg,
 	val += memcg->nocpu_base.events[idx];
 	spin_unlock(&memcg->pcp_counter_lock);
 #endif
 	put_online_cpus();
 	return val;
 }
@ -2159,27 +2161,67 @@ static void memcg_oom_recover(struct mem_cgroup *memcg)
 		memcg_wakeup_oom(memcg);
 }
 /*
 * try to call OOM killer
 */
 static void mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order)
 {
 	bool locked;
 	int wakeups;
 	if (!current->memcg_oom.may_oom)
 		return;
 	current->memcg_oom.in_memcg_oom = 1;
 	/*
-	 * As with any blocking lock, a contender needs to start
+	 * We are in the middle of the charge context here, so we
-	 * listening for wakeups before attempting the trylock,
+	 * don't want to block when potentially sitting on a callstack
-	 * otherwise it can miss the wakeup from the unlock and sleep
+	 * that holds all kinds of filesystem and mm locks.
-	 * indefinitely.  This is just open-coded because our locking
+	 *
-	 * is so particular to memcg hierarchies.
+	 * Also, the caller may handle a failed allocation gracefully
 	 * (like optional page cache readahead) and so an OOM killer
 	 * invocation might not even be necessary.
 	 *
 	 * That's why we don't do anything here except remember the
 	 * OOM context and then deal with it at the end of the page
 	 * fault when the stack is unwound, the locks are released,
 	 * and when we know whether the fault was overall successful.
 	 */
-	wakeups = atomic_read(&memcg->oom_wakeups);
+	css_get(&memcg->css);
 	current->memcg_oom.memcg = memcg;
 	current->memcg_oom.gfp_mask = mask;
 	current->memcg_oom.order = order;
 }
 /**
 * mem_cgroup_oom_synchronize - complete memcg OOM handling
 * @handle: actually kill/wait or just clean up the OOM state
 *
 * This has to be called at the end of a page fault if the memcg OOM
 * handler was enabled.
 *
 * Memcg supports userspace OOM handling where failed allocations must
 * sleep on a waitqueue until the userspace task resolves the
 * situation.  Sleeping directly in the charge context with all kinds
 * of locks held is not a good idea, instead we remember an OOM state
 * in the task and mem_cgroup_oom_synchronize() has to be called at
 * the end of the page fault to complete the OOM handling.
 *
 * Returns %true if an ongoing memcg OOM situation was detected and
 * completed, %false otherwise.
 */
 bool mem_cgroup_oom_synchronize(bool handle)
 {
 	struct mem_cgroup *memcg = current->memcg_oom.memcg;
 	struct oom_wait_info owait;
 	bool locked;
 	/* OOM is global, do not handle */
 	if (!memcg)
 		return false;
 	if (!handle)
 		goto cleanup;
 	owait.memcg = memcg;
 	owait.wait.flags = 0;
 	owait.wait.func = memcg_oom_wake_function;
 	owait.wait.private = current;
 	INIT_LIST_HEAD(&owait.wait.task_list);
 	prepare_to_wait(&memcg_oom_waitq, &owait.wait, TASK_KILLABLE);
 	mem_cgroup_mark_under_oom(memcg);
 	locked = mem_cgroup_oom_trylock(memcg);
@ -2189,95 +2231,16 @@ static void mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order)
 	if (locked && !memcg->oom_kill_disable) {
 		mem_cgroup_unmark_under_oom(memcg);
-		mem_cgroup_out_of_memory(memcg, mask, order);
+		finish_wait(&memcg_oom_waitq, &owait.wait);
-		mem_cgroup_oom_unlock(memcg);
+		mem_cgroup_out_of_memory(memcg, current->memcg_oom.gfp_mask,
-		/*
+					 current->memcg_oom.order);
 		 * There is no guarantee that an OOM-lock contender
 		 * sees the wakeups triggered by the OOM kill
 		 * uncharges.  Wake any sleepers explicitely.
 		 */
 		memcg_oom_recover(memcg);
 	} else {
 		/*
 		 * A system call can just return -ENOMEM, but if this
 		 * is a page fault and somebody else is handling the
 		 * OOM already, we need to sleep on the OOM waitqueue
 		 * for this memcg until the situation is resolved.
 		 * Which can take some time because it might be
 		 * handled by a userspace task.
 		 *
 		 * However, this is the charge context, which means
 		 * that we may sit on a large call stack and hold
 		 * various filesystem locks, the mmap_sem etc. and we
 		 * don't want the OOM handler to deadlock on them
 		 * while we sit here and wait.  Store the current OOM
 		 * context in the task_struct, then return -ENOMEM.
 		 * At the end of the page fault handler, with the
 		 * stack unwound, pagefault_out_of_memory() will check
 		 * back with us by calling
 		 * mem_cgroup_oom_synchronize(), possibly putting the
 		 * task to sleep.
 		 */
 		current->memcg_oom.oom_locked = locked;
 		current->memcg_oom.wakeups = wakeups;
 		css_get(&memcg->css);
 		current->memcg_oom.wait_on_memcg = memcg;
 	}
 }
 /**
 * mem_cgroup_oom_synchronize - complete memcg OOM handling
 *
 * This has to be called at the end of a page fault if the the memcg
 * OOM handler was enabled and the fault is returning %VM_FAULT_OOM.
 *
 * Memcg supports userspace OOM handling, so failed allocations must
 * sleep on a waitqueue until the userspace task resolves the
 * situation.  Sleeping directly in the charge context with all kinds
 * of locks held is not a good idea, instead we remember an OOM state
 * in the task and mem_cgroup_oom_synchronize() has to be called at
 * the end of the page fault to put the task to sleep and clean up the
 * OOM state.
 *
 * Returns %true if an ongoing memcg OOM situation was detected and
 * finalized, %false otherwise.
 */
 bool mem_cgroup_oom_synchronize(void)
 {
 	struct oom_wait_info owait;
 	struct mem_cgroup *memcg;
 	/* OOM is global, do not handle */
 	if (!current->memcg_oom.in_memcg_oom)
 		return false;
 	/*
 	 * We invoked the OOM killer but there is a chance that a kill
 	 * did not free up any charges.  Everybody else might already
 	 * be sleeping, so restart the fault and keep the rampage
 	 * going until some charges are released.
 	 */
 	memcg = current->memcg_oom.wait_on_memcg;
 	if (!memcg)
 		goto out;
 	if (test_thread_flag(TIF_MEMDIE) || fatal_signal_pending(current))
 		goto out_memcg;
 	owait.memcg = memcg;
 	owait.wait.flags = 0;
 	owait.wait.func = memcg_oom_wake_function;
 	owait.wait.private = current;
 	INIT_LIST_HEAD(&owait.wait.task_list);
 	prepare_to_wait(&memcg_oom_waitq, &owait.wait, TASK_KILLABLE);
 	/* Only sleep if we didn't miss any wakeups since OOM */
 	if (atomic_read(&memcg->oom_wakeups) == current->memcg_oom.wakeups)
 		schedule();
-	finish_wait(&memcg_oom_waitq, &owait.wait);
+		mem_cgroup_unmark_under_oom(memcg);
-out_memcg:
+		finish_wait(&memcg_oom_waitq, &owait.wait);
-	mem_cgroup_unmark_under_oom(memcg);
+	}
-	if (current->memcg_oom.oom_locked) {
+
 	if (locked) {
 		mem_cgroup_oom_unlock(memcg);
 		/*
 		 * There is no guarantee that an OOM-lock contender
@ -2286,10 +2249,9 @@ bool mem_cgroup_oom_synchronize(void)
 		 */
 		memcg_oom_recover(memcg);
 	}
 cleanup:
 	current->memcg_oom.memcg = NULL;
 	css_put(&memcg->css);
 	current->memcg_oom.wait_on_memcg = NULL;
 out:
 	current->memcg_oom.in_memcg_oom = 0;
 	return true;
 }
@ -2703,6 +2665,9 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
 		     || fatal_signal_pending(current)))
 		goto bypass;
 	if (unlikely(task_in_memcg_oom(current)))
 		goto bypass;
 	/*
 	 * We always charge the cgroup the mm_struct belongs to.
 	 * The mm_struct's mem_cgroup changes on task migration if the
@ -2801,6 +2766,8 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
 	return 0;
 nomem:
 	*ptr = NULL;
 	if (gfp_mask & __GFP_NOFAIL)
 		return 0;
 	return -ENOMEM;
 bypass:
 	*ptr = root_mem_cgroup;
--- a/mm/memory.c
+++ b/mm/memory.c
@ -837,6 +837,8 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 					 */
 					make_migration_entry_read(&entry);
 					pte = swp_entry_to_pte(entry);
 					if (pte_swp_soft_dirty(*src_pte))
 						pte = pte_swp_mksoft_dirty(pte);
 					set_pte_at(src_mm, addr, src_pte, pte);
 				}
 			}
@ -3863,15 +3865,21 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	 * space.  Kernel faults are handled more gracefully.
 	 */
 	if (flags & FAULT_FLAG_USER)
-		mem_cgroup_enable_oom();
+		mem_cgroup_oom_enable();
 	ret = __handle_mm_fault(mm, vma, address, flags);
-	if (flags & FAULT_FLAG_USER)
+	if (flags & FAULT_FLAG_USER) {
-		mem_cgroup_disable_oom();
+		mem_cgroup_oom_disable();
-
+                /*
-	if (WARN_ON(task_in_memcg_oom(current) && !(ret & VM_FAULT_OOM)))
+                 * The task may have entered a memcg OOM situation but
-		mem_cgroup_oom_synchronize();
+                 * if the allocation error was handled gracefully (no
                 * VM_FAULT_OOM), there is no need to kill anything.
                 * Just clean up the OOM state peacefully.
                 */
                if (task_in_memcg_oom(current) && !(ret & VM_FAULT_OOM))
                        mem_cgroup_oom_synchronize(false);
 	}
 	return ret;
 }
--- a/mm/migrate.c
+++ b/mm/migrate.c
@ -161,6 +161,8 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
 	get_page(new);
 	pte = pte_mkold(mk_pte(new, vma->vm_page_prot));
 	if (pte_swp_soft_dirty(*ptep))
 		pte = pte_mksoft_dirty(pte);
 	if (is_write_migration_entry(entry))
 		pte = pte_mkwrite(pte);
 #ifdef CONFIG_HUGETLB_PAGE
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@ -94,13 +94,16 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 			swp_entry_t entry = pte_to_swp_entry(oldpte);
 			if (is_write_migration_entry(entry)) {
 				pte_t newpte;
 				/*
 				 * A protection check is difficult so
 				 * just be safe and disable write
 				 */
 				make_migration_entry_read(&entry);
-				set_pte_at(mm, addr, pte,
+				newpte = swp_entry_to_pte(entry);
-					swp_entry_to_pte(entry));
+				if (pte_swp_soft_dirty(oldpte))
 					newpte = pte_swp_mksoft_dirty(newpte);
 				set_pte_at(mm, addr, pte, newpte);
 			}
 			pages++;
 		}
--- a/mm/mremap.c
+++ b/mm/mremap.c
@ -25,7 +25,6 @@
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 #include <asm/pgalloc.h>
 #include "internal.h"
@ -63,10 +62,8 @@ static pmd_t *alloc_new_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
 		return NULL;
 	pmd = pmd_alloc(mm, pud, addr);
-	if (!pmd) {
+	if (!pmd)
 		pud_free(mm, pud);
 		return NULL;
 	}
 	VM_BUG_ON(pmd_trans_huge(*pmd));
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@ -680,7 +680,7 @@ void pagefault_out_of_memory(void)
 {
 	struct zonelist *zonelist;
-	if (mem_cgroup_oom_synchronize())
+	if (mem_cgroup_oom_synchronize(true))
 		return;
 	zonelist = node_zonelist(first_online_node, GFP_KERNEL);
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@ -1210,11 +1210,11 @@ static unsigned long dirty_poll_interval(unsigned long dirty,
 	return 1;
 }
-static long bdi_max_pause(struct backing_dev_info *bdi,
+static unsigned long bdi_max_pause(struct backing_dev_info *bdi,
-			  unsigned long bdi_dirty)
+				   unsigned long bdi_dirty)
 {
-	long bw = bdi->avg_write_bandwidth;
+	unsigned long bw = bdi->avg_write_bandwidth;
-	long t;
+	unsigned long t;
 	/*
 	 * Limit pause time for small memory systems. If sleeping for too long
@ -1226,7 +1226,7 @@ static long bdi_max_pause(struct backing_dev_info *bdi,
 	t = bdi_dirty / (1 + bw / roundup_pow_of_two(1 + HZ / 8));
 	t++;
-	return min_t(long, t, MAX_PAUSE);
+	return min_t(unsigned long, t, MAX_PAUSE);
 }
 static long bdi_min_pause(struct backing_dev_info *bdi,
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@ -1824,6 +1824,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 	struct filename *pathname;
 	int i, type, prev;
 	int err;
 	unsigned int old_block_size;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
@ -1914,6 +1915,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 	}
 	swap_file = p->swap_file;
 	old_block_size = p->old_block_size;
 	p->swap_file = NULL;
 	p->max = 0;
 	swap_map = p->swap_map;
@ -1938,7 +1940,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 	inode = mapping->host;
 	if (S_ISBLK(inode->i_mode)) {
 		struct block_device *bdev = I_BDEV(inode);
-		set_blocksize(bdev, p->old_block_size);
+		set_blocksize(bdev, old_block_size);
 		blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
 	} else {
 		mutex_lock(&inode->i_mutex);
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@ -211,6 +211,7 @@ void unregister_shrinker(struct shrinker *shrinker)
 	down_write(&shrinker_rwsem);
 	list_del(&shrinker->list);
 	up_write(&shrinker_rwsem);
 	kfree(shrinker->nr_deferred);
 }
 EXPORT_SYMBOL(unregister_shrinker);
--- a/mm/zswap.c
+++ b/mm/zswap.c
@ -804,6 +804,10 @@ static void zswap_frontswap_invalidate_area(unsigned type)
 	}
 	tree->rbroot = RB_ROOT;
 	spin_unlock(&tree->lock);
 	zbud_destroy_pool(tree->pool);
 	kfree(tree);
 	zswap_trees[type] = NULL;
 }
 static struct zbud_ops zswap_zbud_ops = {
--- a/tools/testing/selftests/timers/posix_timers.c
+++ b/tools/testing/selftests/timers/posix_timers.c
@ -151,7 +151,7 @@ static int check_timer_create(int which)
 	fflush(stdout);
 	done = 0;
-	timer_create(which, NULL, &id);
+	err = timer_create(which, NULL, &id);
 	if (err < 0) {
 		perror("Can't create timer\n");
 		return -1;