Make sure that mlocked pages also live on the unevictable LRU, so kswapd will not scan them over and over again. This is achieved through various strategies: 1) add yet another page flag--PG_mlocked--to indicate that the page is locked for efficient testing in vmscan and, optionally, fault path. This allows early culling of unevictable pages, preventing them from getting to page_referenced()/try_to_unmap(). Also allows separate accounting of mlock'd pages, as Nick's original patch did. Note: Nick's original mlock patch used a PG_mlocked flag. I had removed this in favor of the PG_unevictable flag + an mlock_count [new page struct member]. I restored the PG_mlocked flag to eliminate the new count field. 2) add the mlock/unevictable infrastructure to mm/mlock.c, with internal APIs in mm/internal.h. This is a rework of Nick's original patch to these files, taking into account that mlocked pages are now kept on unevictable LRU list. 3) update vmscan.c:page_evictable() to check PageMlocked() and, if vma passed in, the vm_flags. Note that the vma will only be passed in for new pages in the fault path; and then only if the "cull unevictable pages in fault path" patch is included. 4) add try_to_unlock() to rmap.c to walk a page's rmap and ClearPageMlocked() if no other vmas have it mlocked. Reuses as much of try_to_unmap() as possible. This effectively replaces the use of one of the lru list links as an mlock count. If this mechanism let's pages in mlocked vmas leak through w/o PG_mlocked set [I don't know that it does], we should catch them later in try_to_unmap(). One hopes this will be rare, as it will be relatively expensive. Original mm/internal.h, mm/rmap.c and mm/mlock.c changes: Signed-off-by: Nick Piggin <npiggin@suse.de> splitlru: introduce __get_user_pages(): New munlock processing need to GUP_FLAGS_IGNORE_VMA_PERMISSIONS. because current get_user_pages() can't grab PROT_NONE pages theresore it cause PROT_NONE pages can't munlock. [akpm@linux-foundation.org: fix this for pagemap-pass-mm-into-pagewalkers.patch] [akpm@linux-foundation.org: untangle patch interdependencies] [akpm@linux-foundation.org: fix things after out-of-order merging] [hugh@veritas.com: fix page-flags mess] [lee.schermerhorn@hp.com: fix munlock page table walk - now requires 'mm'] [kosaki.motohiro@jp.fujitsu.com: build fix] [kosaki.motohiro@jp.fujitsu.com: fix truncate race and sevaral comments] [kosaki.motohiro@jp.fujitsu.com: splitlru: introduce __get_user_pages()] Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: Rik van Riel <riel@redhat.com> Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Cc: Nick Piggin <npiggin@suse.de> Cc: Dave Hansen <dave@linux.vnet.ibm.com> Cc: Matt Mackall <mpm@selenic.com> Signed-off-by: Hugh Dickins <hugh@veritas.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
159 lines
4.3 KiB
C
159 lines
4.3 KiB
C
#ifndef _LINUX_RMAP_H
|
|
#define _LINUX_RMAP_H
|
|
/*
|
|
* Declarations for Reverse Mapping functions in mm/rmap.c
|
|
*/
|
|
|
|
#include <linux/list.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/spinlock.h>
|
|
#include <linux/memcontrol.h>
|
|
|
|
/*
|
|
* The anon_vma heads a list of private "related" vmas, to scan if
|
|
* an anonymous page pointing to this anon_vma needs to be unmapped:
|
|
* the vmas on the list will be related by forking, or by splitting.
|
|
*
|
|
* Since vmas come and go as they are split and merged (particularly
|
|
* in mprotect), the mapping field of an anonymous page cannot point
|
|
* directly to a vma: instead it points to an anon_vma, on whose list
|
|
* the related vmas can be easily linked or unlinked.
|
|
*
|
|
* After unlinking the last vma on the list, we must garbage collect
|
|
* the anon_vma object itself: we're guaranteed no page can be
|
|
* pointing to this anon_vma once its vma list is empty.
|
|
*/
|
|
struct anon_vma {
|
|
spinlock_t lock; /* Serialize access to vma list */
|
|
/*
|
|
* NOTE: the LSB of the head.next is set by
|
|
* mm_take_all_locks() _after_ taking the above lock. So the
|
|
* head must only be read/written after taking the above lock
|
|
* to be sure to see a valid next pointer. The LSB bit itself
|
|
* is serialized by a system wide lock only visible to
|
|
* mm_take_all_locks() (mm_all_locks_mutex).
|
|
*/
|
|
struct list_head head; /* List of private "related" vmas */
|
|
};
|
|
|
|
#ifdef CONFIG_MMU
|
|
|
|
extern struct kmem_cache *anon_vma_cachep;
|
|
|
|
static inline struct anon_vma *anon_vma_alloc(void)
|
|
{
|
|
return kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
|
|
}
|
|
|
|
static inline void anon_vma_free(struct anon_vma *anon_vma)
|
|
{
|
|
kmem_cache_free(anon_vma_cachep, anon_vma);
|
|
}
|
|
|
|
static inline void anon_vma_lock(struct vm_area_struct *vma)
|
|
{
|
|
struct anon_vma *anon_vma = vma->anon_vma;
|
|
if (anon_vma)
|
|
spin_lock(&anon_vma->lock);
|
|
}
|
|
|
|
static inline void anon_vma_unlock(struct vm_area_struct *vma)
|
|
{
|
|
struct anon_vma *anon_vma = vma->anon_vma;
|
|
if (anon_vma)
|
|
spin_unlock(&anon_vma->lock);
|
|
}
|
|
|
|
/*
|
|
* anon_vma helper functions.
|
|
*/
|
|
void anon_vma_init(void); /* create anon_vma_cachep */
|
|
int anon_vma_prepare(struct vm_area_struct *);
|
|
void __anon_vma_merge(struct vm_area_struct *, struct vm_area_struct *);
|
|
void anon_vma_unlink(struct vm_area_struct *);
|
|
void anon_vma_link(struct vm_area_struct *);
|
|
void __anon_vma_link(struct vm_area_struct *);
|
|
|
|
/*
|
|
* rmap interfaces called when adding or removing pte of page
|
|
*/
|
|
void page_add_anon_rmap(struct page *, struct vm_area_struct *, unsigned long);
|
|
void page_add_new_anon_rmap(struct page *, struct vm_area_struct *, unsigned long);
|
|
void page_add_file_rmap(struct page *);
|
|
void page_remove_rmap(struct page *, struct vm_area_struct *);
|
|
|
|
#ifdef CONFIG_DEBUG_VM
|
|
void page_dup_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address);
|
|
#else
|
|
static inline void page_dup_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address)
|
|
{
|
|
atomic_inc(&page->_mapcount);
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* Called from mm/vmscan.c to handle paging out
|
|
*/
|
|
int page_referenced(struct page *, int is_locked, struct mem_cgroup *cnt);
|
|
int try_to_unmap(struct page *, int ignore_refs);
|
|
|
|
/*
|
|
* Called from mm/filemap_xip.c to unmap empty zero page
|
|
*/
|
|
pte_t *page_check_address(struct page *, struct mm_struct *,
|
|
unsigned long, spinlock_t **, int);
|
|
|
|
/*
|
|
* Used by swapoff to help locate where page is expected in vma.
|
|
*/
|
|
unsigned long page_address_in_vma(struct page *, struct vm_area_struct *);
|
|
|
|
/*
|
|
* Cleans the PTEs of shared mappings.
|
|
* (and since clean PTEs should also be readonly, write protects them too)
|
|
*
|
|
* returns the number of cleaned PTEs.
|
|
*/
|
|
int page_mkclean(struct page *);
|
|
|
|
#ifdef CONFIG_UNEVICTABLE_LRU
|
|
/*
|
|
* called in munlock()/munmap() path to check for other vmas holding
|
|
* the page mlocked.
|
|
*/
|
|
int try_to_munlock(struct page *);
|
|
#else
|
|
static inline int try_to_munlock(struct page *page)
|
|
{
|
|
return 0; /* a.k.a. SWAP_SUCCESS */
|
|
}
|
|
#endif
|
|
|
|
#else /* !CONFIG_MMU */
|
|
|
|
#define anon_vma_init() do {} while (0)
|
|
#define anon_vma_prepare(vma) (0)
|
|
#define anon_vma_link(vma) do {} while (0)
|
|
|
|
#define page_referenced(page,l,cnt) TestClearPageReferenced(page)
|
|
#define try_to_unmap(page, refs) SWAP_FAIL
|
|
|
|
static inline int page_mkclean(struct page *page)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
|
|
#endif /* CONFIG_MMU */
|
|
|
|
/*
|
|
* Return values of try_to_unmap
|
|
*/
|
|
#define SWAP_SUCCESS 0
|
|
#define SWAP_AGAIN 1
|
|
#define SWAP_FAIL 2
|
|
#define SWAP_MLOCK 3
|
|
|
|
#endif /* _LINUX_RMAP_H */
|