BACKPORT: FROMGIT: mm: enable page walking API to lock vmas during the walk
walk_page_range() and friends often operate under write-locked mmap_lock. With introduction of vma locks, the vmas have to be locked as well during such walks to prevent concurrent page faults in these areas. Add an additional member to mm_walk_ops to indicate locking requirements for the walk. The change ensures that page walks which prevent concurrent page faults by write-locking mmap_lock, operate correctly after introduction of per-vma locks. With per-vma locks page faults can be handled under vma lock without taking mmap_lock at all, so write locking mmap_lock would not stop them. The change ensures vmas are properly locked during such walks. A sample issue this solves is do_mbind() performing queue_pages_range() to queue pages for migration. Without this change a concurrent page can be faulted into the area and be left out of migration. Link: https://lkml.kernel.org/r/20230804152724.3090321-2-surenb@google.com Signed-off-by: Suren Baghdasaryan <surenb@google.com> Suggested-by: Linus Torvalds <torvalds@linuxfoundation.org> Suggested-by: Jann Horn <jannh@google.com> Cc: David Hildenbrand <david@redhat.com> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: Hugh Dickins <hughd@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Laurent Dufour <ldufour@linux.ibm.com> Cc: Liam Howlett <liam.howlett@oracle.com> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Michal Hocko <mhocko@suse.com> Cc: Michel Lespinasse <michel@lespinasse.org> Cc: Peter Xu <peterx@redhat.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> (cherry picked from commit 2ebc368f59eedcef0de7c832fe1d62935cd3a7ff https: //git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-unstable) [surenb: changed locking in break_ksm since it's done differently, skipped the change in the missing __ksm_del_vma(), skipped the change in the missing walk_page_range_vma(), removed unused local variables] Bug: 293665307 Change-Id: Iede9eaa950ea59a268a2e74a8d3022162f0bbd80 Signed-off-by: Suren Baghdasaryan <surenb@google.com>
This commit is contained in:
parent
b6093c47fe
commit
3c187b4a12
@ -143,6 +143,7 @@ static int subpage_walk_pmd_entry(pmd_t *pmd, unsigned long addr,
|
|||||||
|
|
||||||
static const struct mm_walk_ops subpage_walk_ops = {
|
static const struct mm_walk_ops subpage_walk_ops = {
|
||||||
.pmd_entry = subpage_walk_pmd_entry,
|
.pmd_entry = subpage_walk_pmd_entry,
|
||||||
|
.walk_lock = PGWALK_WRLOCK_VERIFY,
|
||||||
};
|
};
|
||||||
|
|
||||||
static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr,
|
static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr,
|
||||||
|
@ -102,6 +102,7 @@ static const struct mm_walk_ops pageattr_ops = {
|
|||||||
.pmd_entry = pageattr_pmd_entry,
|
.pmd_entry = pageattr_pmd_entry,
|
||||||
.pte_entry = pageattr_pte_entry,
|
.pte_entry = pageattr_pte_entry,
|
||||||
.pte_hole = pageattr_pte_hole,
|
.pte_hole = pageattr_pte_hole,
|
||||||
|
.walk_lock = PGWALK_RDLOCK,
|
||||||
};
|
};
|
||||||
|
|
||||||
static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
|
static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
|
||||||
|
@ -2510,6 +2510,7 @@ static int thp_split_walk_pmd_entry(pmd_t *pmd, unsigned long addr,
|
|||||||
|
|
||||||
static const struct mm_walk_ops thp_split_walk_ops = {
|
static const struct mm_walk_ops thp_split_walk_ops = {
|
||||||
.pmd_entry = thp_split_walk_pmd_entry,
|
.pmd_entry = thp_split_walk_pmd_entry,
|
||||||
|
.walk_lock = PGWALK_WRLOCK_VERIFY,
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline void thp_split_mm(struct mm_struct *mm)
|
static inline void thp_split_mm(struct mm_struct *mm)
|
||||||
@ -2554,6 +2555,7 @@ static int __zap_zero_pages(pmd_t *pmd, unsigned long start,
|
|||||||
|
|
||||||
static const struct mm_walk_ops zap_zero_walk_ops = {
|
static const struct mm_walk_ops zap_zero_walk_ops = {
|
||||||
.pmd_entry = __zap_zero_pages,
|
.pmd_entry = __zap_zero_pages,
|
||||||
|
.walk_lock = PGWALK_WRLOCK,
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -2655,6 +2657,7 @@ static const struct mm_walk_ops enable_skey_walk_ops = {
|
|||||||
.hugetlb_entry = __s390_enable_skey_hugetlb,
|
.hugetlb_entry = __s390_enable_skey_hugetlb,
|
||||||
.pte_entry = __s390_enable_skey_pte,
|
.pte_entry = __s390_enable_skey_pte,
|
||||||
.pmd_entry = __s390_enable_skey_pmd,
|
.pmd_entry = __s390_enable_skey_pmd,
|
||||||
|
.walk_lock = PGWALK_WRLOCK,
|
||||||
};
|
};
|
||||||
|
|
||||||
int s390_enable_skey(void)
|
int s390_enable_skey(void)
|
||||||
@ -2692,6 +2695,7 @@ static int __s390_reset_cmma(pte_t *pte, unsigned long addr,
|
|||||||
|
|
||||||
static const struct mm_walk_ops reset_cmma_walk_ops = {
|
static const struct mm_walk_ops reset_cmma_walk_ops = {
|
||||||
.pte_entry = __s390_reset_cmma,
|
.pte_entry = __s390_reset_cmma,
|
||||||
|
.walk_lock = PGWALK_WRLOCK,
|
||||||
};
|
};
|
||||||
|
|
||||||
void s390_reset_cmma(struct mm_struct *mm)
|
void s390_reset_cmma(struct mm_struct *mm)
|
||||||
@ -2728,6 +2732,7 @@ static int s390_gather_pages(pte_t *ptep, unsigned long addr,
|
|||||||
|
|
||||||
static const struct mm_walk_ops gather_pages_ops = {
|
static const struct mm_walk_ops gather_pages_ops = {
|
||||||
.pte_entry = s390_gather_pages,
|
.pte_entry = s390_gather_pages,
|
||||||
|
.walk_lock = PGWALK_RDLOCK,
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -758,12 +758,14 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
|
|||||||
static const struct mm_walk_ops smaps_walk_ops = {
|
static const struct mm_walk_ops smaps_walk_ops = {
|
||||||
.pmd_entry = smaps_pte_range,
|
.pmd_entry = smaps_pte_range,
|
||||||
.hugetlb_entry = smaps_hugetlb_range,
|
.hugetlb_entry = smaps_hugetlb_range,
|
||||||
|
.walk_lock = PGWALK_RDLOCK,
|
||||||
};
|
};
|
||||||
|
|
||||||
static const struct mm_walk_ops smaps_shmem_walk_ops = {
|
static const struct mm_walk_ops smaps_shmem_walk_ops = {
|
||||||
.pmd_entry = smaps_pte_range,
|
.pmd_entry = smaps_pte_range,
|
||||||
.hugetlb_entry = smaps_hugetlb_range,
|
.hugetlb_entry = smaps_hugetlb_range,
|
||||||
.pte_hole = smaps_pte_hole,
|
.pte_hole = smaps_pte_hole,
|
||||||
|
.walk_lock = PGWALK_RDLOCK,
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1247,6 +1249,7 @@ static int clear_refs_test_walk(unsigned long start, unsigned long end,
|
|||||||
static const struct mm_walk_ops clear_refs_walk_ops = {
|
static const struct mm_walk_ops clear_refs_walk_ops = {
|
||||||
.pmd_entry = clear_refs_pte_range,
|
.pmd_entry = clear_refs_pte_range,
|
||||||
.test_walk = clear_refs_test_walk,
|
.test_walk = clear_refs_test_walk,
|
||||||
|
.walk_lock = PGWALK_WRLOCK,
|
||||||
};
|
};
|
||||||
|
|
||||||
static ssize_t clear_refs_write(struct file *file, const char __user *buf,
|
static ssize_t clear_refs_write(struct file *file, const char __user *buf,
|
||||||
@ -1623,6 +1626,7 @@ static const struct mm_walk_ops pagemap_ops = {
|
|||||||
.pmd_entry = pagemap_pmd_range,
|
.pmd_entry = pagemap_pmd_range,
|
||||||
.pte_hole = pagemap_pte_hole,
|
.pte_hole = pagemap_pte_hole,
|
||||||
.hugetlb_entry = pagemap_hugetlb_range,
|
.hugetlb_entry = pagemap_hugetlb_range,
|
||||||
|
.walk_lock = PGWALK_RDLOCK,
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1929,6 +1933,7 @@ static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
|
|||||||
static const struct mm_walk_ops show_numa_ops = {
|
static const struct mm_walk_ops show_numa_ops = {
|
||||||
.hugetlb_entry = gather_hugetlb_stats,
|
.hugetlb_entry = gather_hugetlb_stats,
|
||||||
.pmd_entry = gather_pte_stats,
|
.pmd_entry = gather_pte_stats,
|
||||||
|
.walk_lock = PGWALK_RDLOCK,
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -6,6 +6,16 @@
|
|||||||
|
|
||||||
struct mm_walk;
|
struct mm_walk;
|
||||||
|
|
||||||
|
/* Locking requirement during a page walk. */
|
||||||
|
enum page_walk_lock {
|
||||||
|
/* mmap_lock should be locked for read to stabilize the vma tree */
|
||||||
|
PGWALK_RDLOCK = 0,
|
||||||
|
/* vma will be write-locked during the walk */
|
||||||
|
PGWALK_WRLOCK = 1,
|
||||||
|
/* vma is expected to be already write-locked during the walk */
|
||||||
|
PGWALK_WRLOCK_VERIFY = 2,
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* struct mm_walk_ops - callbacks for walk_page_range
|
* struct mm_walk_ops - callbacks for walk_page_range
|
||||||
* @pgd_entry: if set, called for each non-empty PGD (top-level) entry
|
* @pgd_entry: if set, called for each non-empty PGD (top-level) entry
|
||||||
@ -55,6 +65,7 @@ struct mm_walk_ops {
|
|||||||
int (*pre_vma)(unsigned long start, unsigned long end,
|
int (*pre_vma)(unsigned long start, unsigned long end,
|
||||||
struct mm_walk *walk);
|
struct mm_walk *walk);
|
||||||
void (*post_vma)(struct mm_walk *walk);
|
void (*post_vma)(struct mm_walk *walk);
|
||||||
|
enum page_walk_lock walk_lock;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -384,6 +384,7 @@ static int damon_mkold_hugetlb_entry(pte_t *pte, unsigned long hmask,
|
|||||||
static const struct mm_walk_ops damon_mkold_ops = {
|
static const struct mm_walk_ops damon_mkold_ops = {
|
||||||
.pmd_entry = damon_mkold_pmd_entry,
|
.pmd_entry = damon_mkold_pmd_entry,
|
||||||
.hugetlb_entry = damon_mkold_hugetlb_entry,
|
.hugetlb_entry = damon_mkold_hugetlb_entry,
|
||||||
|
.walk_lock = PGWALK_RDLOCK,
|
||||||
};
|
};
|
||||||
|
|
||||||
static void damon_va_mkold(struct mm_struct *mm, unsigned long addr)
|
static void damon_va_mkold(struct mm_struct *mm, unsigned long addr)
|
||||||
@ -521,6 +522,7 @@ static int damon_young_hugetlb_entry(pte_t *pte, unsigned long hmask,
|
|||||||
static const struct mm_walk_ops damon_young_ops = {
|
static const struct mm_walk_ops damon_young_ops = {
|
||||||
.pmd_entry = damon_young_pmd_entry,
|
.pmd_entry = damon_young_pmd_entry,
|
||||||
.hugetlb_entry = damon_young_hugetlb_entry,
|
.hugetlb_entry = damon_young_hugetlb_entry,
|
||||||
|
.walk_lock = PGWALK_RDLOCK,
|
||||||
};
|
};
|
||||||
|
|
||||||
static bool damon_va_young(struct mm_struct *mm, unsigned long addr,
|
static bool damon_va_young(struct mm_struct *mm, unsigned long addr,
|
||||||
|
1
mm/hmm.c
1
mm/hmm.c
@ -548,6 +548,7 @@ static const struct mm_walk_ops hmm_walk_ops = {
|
|||||||
.pte_hole = hmm_vma_walk_hole,
|
.pte_hole = hmm_vma_walk_hole,
|
||||||
.hugetlb_entry = hmm_vma_walk_hugetlb_entry,
|
.hugetlb_entry = hmm_vma_walk_hugetlb_entry,
|
||||||
.test_walk = hmm_vma_walk_test,
|
.test_walk = hmm_vma_walk_test,
|
||||||
|
.walk_lock = PGWALK_RDLOCK,
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
17
mm/ksm.c
17
mm/ksm.c
@ -434,13 +434,18 @@ static inline bool ksm_test_exit(struct mm_struct *mm)
|
|||||||
* of the process that owns 'vma'. We also do not want to enforce
|
* of the process that owns 'vma'. We also do not want to enforce
|
||||||
* protection keys here anyway.
|
* protection keys here anyway.
|
||||||
*/
|
*/
|
||||||
static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
|
static int break_ksm(struct vm_area_struct *vma, unsigned long addr, bool lock_vma)
|
||||||
{
|
{
|
||||||
struct page *page;
|
struct page *page;
|
||||||
vm_fault_t ret = 0;
|
vm_fault_t ret = 0;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
cond_resched();
|
cond_resched();
|
||||||
|
if (lock_vma)
|
||||||
|
vma_start_write(vma);
|
||||||
|
else
|
||||||
|
mmap_assert_locked(vma->vm_mm);
|
||||||
|
|
||||||
page = follow_page(vma, addr,
|
page = follow_page(vma, addr,
|
||||||
FOLL_GET | FOLL_MIGRATION | FOLL_REMOTE);
|
FOLL_GET | FOLL_MIGRATION | FOLL_REMOTE);
|
||||||
if (IS_ERR_OR_NULL(page))
|
if (IS_ERR_OR_NULL(page))
|
||||||
@ -511,7 +516,7 @@ static void break_cow(struct ksm_rmap_item *rmap_item)
|
|||||||
mmap_read_lock(mm);
|
mmap_read_lock(mm);
|
||||||
vma = find_mergeable_vma(mm, addr);
|
vma = find_mergeable_vma(mm, addr);
|
||||||
if (vma)
|
if (vma)
|
||||||
break_ksm(vma, addr);
|
break_ksm(vma, addr, false);
|
||||||
mmap_read_unlock(mm);
|
mmap_read_unlock(mm);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -814,7 +819,7 @@ static void remove_trailing_rmap_items(struct ksm_rmap_item **rmap_list)
|
|||||||
* in cmp_and_merge_page on one of the rmap_items we would be removing.
|
* in cmp_and_merge_page on one of the rmap_items we would be removing.
|
||||||
*/
|
*/
|
||||||
static int unmerge_ksm_pages(struct vm_area_struct *vma,
|
static int unmerge_ksm_pages(struct vm_area_struct *vma,
|
||||||
unsigned long start, unsigned long end)
|
unsigned long start, unsigned long end, bool lock_vma)
|
||||||
{
|
{
|
||||||
unsigned long addr;
|
unsigned long addr;
|
||||||
int err = 0;
|
int err = 0;
|
||||||
@ -825,7 +830,7 @@ static int unmerge_ksm_pages(struct vm_area_struct *vma,
|
|||||||
if (signal_pending(current))
|
if (signal_pending(current))
|
||||||
err = -ERESTARTSYS;
|
err = -ERESTARTSYS;
|
||||||
else
|
else
|
||||||
err = break_ksm(vma, addr);
|
err = break_ksm(vma, addr, lock_vma);
|
||||||
}
|
}
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
@ -972,7 +977,7 @@ static int unmerge_and_remove_all_rmap_items(void)
|
|||||||
if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
|
if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
|
||||||
continue;
|
continue;
|
||||||
err = unmerge_ksm_pages(vma,
|
err = unmerge_ksm_pages(vma,
|
||||||
vma->vm_start, vma->vm_end);
|
vma->vm_start, vma->vm_end, false);
|
||||||
if (err)
|
if (err)
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
@ -2487,7 +2492,7 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
|
|||||||
return 0; /* just ignore the advice */
|
return 0; /* just ignore the advice */
|
||||||
|
|
||||||
if (vma->anon_vma) {
|
if (vma->anon_vma) {
|
||||||
err = unmerge_ksm_pages(vma, start, end);
|
err = unmerge_ksm_pages(vma, start, end, true);
|
||||||
if (err)
|
if (err)
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
@ -234,6 +234,7 @@ static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start,
|
|||||||
|
|
||||||
static const struct mm_walk_ops swapin_walk_ops = {
|
static const struct mm_walk_ops swapin_walk_ops = {
|
||||||
.pmd_entry = swapin_walk_pmd_entry,
|
.pmd_entry = swapin_walk_pmd_entry,
|
||||||
|
.walk_lock = PGWALK_RDLOCK,
|
||||||
};
|
};
|
||||||
|
|
||||||
static void force_shm_swapin_readahead(struct vm_area_struct *vma,
|
static void force_shm_swapin_readahead(struct vm_area_struct *vma,
|
||||||
@ -541,6 +542,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
|
|||||||
|
|
||||||
static const struct mm_walk_ops cold_walk_ops = {
|
static const struct mm_walk_ops cold_walk_ops = {
|
||||||
.pmd_entry = madvise_cold_or_pageout_pte_range,
|
.pmd_entry = madvise_cold_or_pageout_pte_range,
|
||||||
|
.walk_lock = PGWALK_RDLOCK,
|
||||||
};
|
};
|
||||||
|
|
||||||
static void madvise_cold_page_range(struct mmu_gather *tlb,
|
static void madvise_cold_page_range(struct mmu_gather *tlb,
|
||||||
@ -763,6 +765,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
|
|||||||
|
|
||||||
static const struct mm_walk_ops madvise_free_walk_ops = {
|
static const struct mm_walk_ops madvise_free_walk_ops = {
|
||||||
.pmd_entry = madvise_free_pte_range,
|
.pmd_entry = madvise_free_pte_range,
|
||||||
|
.walk_lock = PGWALK_RDLOCK,
|
||||||
};
|
};
|
||||||
|
|
||||||
static int madvise_free_single_vma(struct vm_area_struct *vma,
|
static int madvise_free_single_vma(struct vm_area_struct *vma,
|
||||||
|
@ -5966,6 +5966,7 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
|
|||||||
|
|
||||||
static const struct mm_walk_ops precharge_walk_ops = {
|
static const struct mm_walk_ops precharge_walk_ops = {
|
||||||
.pmd_entry = mem_cgroup_count_precharge_pte_range,
|
.pmd_entry = mem_cgroup_count_precharge_pte_range,
|
||||||
|
.walk_lock = PGWALK_RDLOCK,
|
||||||
};
|
};
|
||||||
|
|
||||||
static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm)
|
static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm)
|
||||||
@ -6242,6 +6243,7 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
|
|||||||
|
|
||||||
static const struct mm_walk_ops charge_walk_ops = {
|
static const struct mm_walk_ops charge_walk_ops = {
|
||||||
.pmd_entry = mem_cgroup_move_charge_pte_range,
|
.pmd_entry = mem_cgroup_move_charge_pte_range,
|
||||||
|
.walk_lock = PGWALK_RDLOCK,
|
||||||
};
|
};
|
||||||
|
|
||||||
static void mem_cgroup_move_charge(void)
|
static void mem_cgroup_move_charge(void)
|
||||||
|
@ -722,6 +722,7 @@ static int hwpoison_hugetlb_range(pte_t *ptep, unsigned long hmask,
|
|||||||
static const struct mm_walk_ops hwp_walk_ops = {
|
static const struct mm_walk_ops hwp_walk_ops = {
|
||||||
.pmd_entry = hwpoison_pte_range,
|
.pmd_entry = hwpoison_pte_range,
|
||||||
.hugetlb_entry = hwpoison_hugetlb_range,
|
.hugetlb_entry = hwpoison_hugetlb_range,
|
||||||
|
.walk_lock = PGWALK_RDLOCK,
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -709,6 +709,14 @@ static const struct mm_walk_ops queue_pages_walk_ops = {
|
|||||||
.hugetlb_entry = queue_pages_hugetlb,
|
.hugetlb_entry = queue_pages_hugetlb,
|
||||||
.pmd_entry = queue_pages_pte_range,
|
.pmd_entry = queue_pages_pte_range,
|
||||||
.test_walk = queue_pages_test_walk,
|
.test_walk = queue_pages_test_walk,
|
||||||
|
.walk_lock = PGWALK_RDLOCK,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct mm_walk_ops queue_pages_lock_vma_walk_ops = {
|
||||||
|
.hugetlb_entry = queue_pages_hugetlb,
|
||||||
|
.pmd_entry = queue_pages_pte_range,
|
||||||
|
.test_walk = queue_pages_test_walk,
|
||||||
|
.walk_lock = PGWALK_WRLOCK,
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -729,7 +737,7 @@ static const struct mm_walk_ops queue_pages_walk_ops = {
|
|||||||
static int
|
static int
|
||||||
queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
|
queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
|
||||||
nodemask_t *nodes, unsigned long flags,
|
nodemask_t *nodes, unsigned long flags,
|
||||||
struct list_head *pagelist)
|
struct list_head *pagelist, bool lock_vma)
|
||||||
{
|
{
|
||||||
int err;
|
int err;
|
||||||
struct queue_pages qp = {
|
struct queue_pages qp = {
|
||||||
@ -740,8 +748,10 @@ queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
|
|||||||
.end = end,
|
.end = end,
|
||||||
.first = NULL,
|
.first = NULL,
|
||||||
};
|
};
|
||||||
|
const struct mm_walk_ops *ops = lock_vma ?
|
||||||
|
&queue_pages_lock_vma_walk_ops : &queue_pages_walk_ops;
|
||||||
|
|
||||||
err = walk_page_range(mm, start, end, &queue_pages_walk_ops, &qp);
|
err = walk_page_range(mm, start, end, ops, &qp);
|
||||||
|
|
||||||
if (!qp.first)
|
if (!qp.first)
|
||||||
/* whole range in hole */
|
/* whole range in hole */
|
||||||
@ -1086,7 +1096,7 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest,
|
|||||||
vma = find_vma(mm, 0);
|
vma = find_vma(mm, 0);
|
||||||
VM_BUG_ON(!(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)));
|
VM_BUG_ON(!(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)));
|
||||||
queue_pages_range(mm, vma->vm_start, mm->task_size, &nmask,
|
queue_pages_range(mm, vma->vm_start, mm->task_size, &nmask,
|
||||||
flags | MPOL_MF_DISCONTIG_OK, &pagelist);
|
flags | MPOL_MF_DISCONTIG_OK, &pagelist, false);
|
||||||
|
|
||||||
if (!list_empty(&pagelist)) {
|
if (!list_empty(&pagelist)) {
|
||||||
err = migrate_pages(&pagelist, alloc_migration_target, NULL,
|
err = migrate_pages(&pagelist, alloc_migration_target, NULL,
|
||||||
@ -1263,8 +1273,6 @@ static long do_mbind(unsigned long start, unsigned long len,
|
|||||||
nodemask_t *nmask, unsigned long flags)
|
nodemask_t *nmask, unsigned long flags)
|
||||||
{
|
{
|
||||||
struct mm_struct *mm = current->mm;
|
struct mm_struct *mm = current->mm;
|
||||||
struct vm_area_struct *vma;
|
|
||||||
struct vma_iterator vmi;
|
|
||||||
struct mempolicy *new;
|
struct mempolicy *new;
|
||||||
unsigned long end;
|
unsigned long end;
|
||||||
int err;
|
int err;
|
||||||
@ -1330,12 +1338,8 @@ static long do_mbind(unsigned long start, unsigned long len,
|
|||||||
* Lock the VMAs before scanning for pages to migrate, to ensure we don't
|
* Lock the VMAs before scanning for pages to migrate, to ensure we don't
|
||||||
* miss a concurrently inserted page.
|
* miss a concurrently inserted page.
|
||||||
*/
|
*/
|
||||||
vma_iter_init(&vmi, mm, start);
|
|
||||||
for_each_vma_range(vmi, vma, end)
|
|
||||||
vma_start_write(vma);
|
|
||||||
|
|
||||||
ret = queue_pages_range(mm, start, end, nmask,
|
ret = queue_pages_range(mm, start, end, nmask,
|
||||||
flags | MPOL_MF_INVERT, &pagelist);
|
flags | MPOL_MF_INVERT, &pagelist, true);
|
||||||
|
|
||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
err = ret;
|
err = ret;
|
||||||
|
@ -286,6 +286,7 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
|
|||||||
static const struct mm_walk_ops migrate_vma_walk_ops = {
|
static const struct mm_walk_ops migrate_vma_walk_ops = {
|
||||||
.pmd_entry = migrate_vma_collect_pmd,
|
.pmd_entry = migrate_vma_collect_pmd,
|
||||||
.pte_hole = migrate_vma_collect_hole,
|
.pte_hole = migrate_vma_collect_hole,
|
||||||
|
.walk_lock = PGWALK_RDLOCK,
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -177,6 +177,7 @@ static const struct mm_walk_ops mincore_walk_ops = {
|
|||||||
.pmd_entry = mincore_pte_range,
|
.pmd_entry = mincore_pte_range,
|
||||||
.pte_hole = mincore_unmapped_range,
|
.pte_hole = mincore_unmapped_range,
|
||||||
.hugetlb_entry = mincore_hugetlb,
|
.hugetlb_entry = mincore_hugetlb,
|
||||||
|
.walk_lock = PGWALK_RDLOCK,
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -365,6 +365,7 @@ static void mlock_vma_pages_range(struct vm_area_struct *vma,
|
|||||||
{
|
{
|
||||||
static const struct mm_walk_ops mlock_walk_ops = {
|
static const struct mm_walk_ops mlock_walk_ops = {
|
||||||
.pmd_entry = mlock_pte_range,
|
.pmd_entry = mlock_pte_range,
|
||||||
|
.walk_lock = PGWALK_WRLOCK_VERIFY,
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -542,6 +542,7 @@ static const struct mm_walk_ops prot_none_walk_ops = {
|
|||||||
.pte_entry = prot_none_pte_entry,
|
.pte_entry = prot_none_pte_entry,
|
||||||
.hugetlb_entry = prot_none_hugetlb_entry,
|
.hugetlb_entry = prot_none_hugetlb_entry,
|
||||||
.test_walk = prot_none_test,
|
.test_walk = prot_none_test,
|
||||||
|
.walk_lock = PGWALK_WRLOCK,
|
||||||
};
|
};
|
||||||
|
|
||||||
int
|
int
|
||||||
|
@ -384,6 +384,33 @@ static int __walk_page_range(unsigned long start, unsigned long end,
|
|||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void process_mm_walk_lock(struct mm_struct *mm,
|
||||||
|
enum page_walk_lock walk_lock)
|
||||||
|
{
|
||||||
|
if (walk_lock == PGWALK_RDLOCK)
|
||||||
|
mmap_assert_locked(mm);
|
||||||
|
else
|
||||||
|
mmap_assert_write_locked(mm);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void process_vma_walk_lock(struct vm_area_struct *vma,
|
||||||
|
enum page_walk_lock walk_lock)
|
||||||
|
{
|
||||||
|
#ifdef CONFIG_PER_VMA_LOCK
|
||||||
|
switch (walk_lock) {
|
||||||
|
case PGWALK_WRLOCK:
|
||||||
|
vma_start_write(vma);
|
||||||
|
break;
|
||||||
|
case PGWALK_WRLOCK_VERIFY:
|
||||||
|
vma_assert_write_locked(vma);
|
||||||
|
break;
|
||||||
|
case PGWALK_RDLOCK:
|
||||||
|
/* PGWALK_RDLOCK is handled by process_mm_walk_lock */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* walk_page_range - walk page table with caller specific callbacks
|
* walk_page_range - walk page table with caller specific callbacks
|
||||||
* @mm: mm_struct representing the target process of page table walk
|
* @mm: mm_struct representing the target process of page table walk
|
||||||
@ -443,7 +470,7 @@ int walk_page_range(struct mm_struct *mm, unsigned long start,
|
|||||||
if (!walk.mm)
|
if (!walk.mm)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
mmap_assert_locked(walk.mm);
|
process_mm_walk_lock(walk.mm, ops->walk_lock);
|
||||||
|
|
||||||
vma = find_vma(walk.mm, start);
|
vma = find_vma(walk.mm, start);
|
||||||
do {
|
do {
|
||||||
@ -458,6 +485,7 @@ int walk_page_range(struct mm_struct *mm, unsigned long start,
|
|||||||
if (ops->pte_hole)
|
if (ops->pte_hole)
|
||||||
err = ops->pte_hole(start, next, -1, &walk);
|
err = ops->pte_hole(start, next, -1, &walk);
|
||||||
} else { /* inside vma */
|
} else { /* inside vma */
|
||||||
|
process_vma_walk_lock(vma, ops->walk_lock);
|
||||||
walk.vma = vma;
|
walk.vma = vma;
|
||||||
next = min(end, vma->vm_end);
|
next = min(end, vma->vm_end);
|
||||||
vma = find_vma(mm, vma->vm_end);
|
vma = find_vma(mm, vma->vm_end);
|
||||||
@ -531,7 +559,8 @@ int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops,
|
|||||||
if (!walk.mm)
|
if (!walk.mm)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
mmap_assert_locked(walk.mm);
|
process_mm_walk_lock(walk.mm, ops->walk_lock);
|
||||||
|
process_vma_walk_lock(vma, ops->walk_lock);
|
||||||
|
|
||||||
err = walk_page_test(vma->vm_start, vma->vm_end, &walk);
|
err = walk_page_test(vma->vm_start, vma->vm_end, &walk);
|
||||||
if (err > 0)
|
if (err > 0)
|
||||||
|
@ -4214,6 +4214,7 @@ static void walk_mm(struct lruvec *lruvec, struct mm_struct *mm, struct lru_gen_
|
|||||||
static const struct mm_walk_ops mm_walk_ops = {
|
static const struct mm_walk_ops mm_walk_ops = {
|
||||||
.test_walk = should_skip_vma,
|
.test_walk = should_skip_vma,
|
||||||
.p4d_entry = walk_pud_range,
|
.p4d_entry = walk_pud_range,
|
||||||
|
.walk_lock = PGWALK_RDLOCK,
|
||||||
};
|
};
|
||||||
|
|
||||||
int err;
|
int err;
|
||||||
|
Loading…
Reference in New Issue
Block a user