Merge 0c563f1480 ("proc: remove VMA rbtree use from nommu") into android-mainline

Steps on the way to 6.1-rc1

Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
Change-Id: I5edff6b45f59f4eed3422bb2e2079c54af885e3f
This commit is contained in:
Greg Kroah-Hartman 2022-10-20 14:21:03 +02:00
commit d7b5f1b219
12 changed files with 587 additions and 338 deletions

View File

@ -96,6 +96,7 @@ void __init tboot_probe(void)
static pgd_t *tboot_pg_dir;
static struct mm_struct tboot_mm = {
.mm_rb = RB_ROOT,
.mm_mt = MTREE_INIT_EXT(mm_mt, MM_MT_FLAGS, tboot_mm.mmap_lock),
.pgd = swapper_pg_dir,
.mm_users = ATOMIC_INIT(2),
.mm_count = ATOMIC_INIT(1),

View File

@ -60,6 +60,7 @@ static unsigned long __initdata initrd = EFI_INVALID_TABLE_ADDR;
struct mm_struct efi_mm = {
.mm_rb = RB_ROOT,
.mm_mt = MTREE_INIT_EXT(mm_mt, MM_MT_FLAGS, efi_mm.mmap_lock),
.mm_users = ATOMIC_INIT(2),
.mm_count = ATOMIC_INIT(1),
.write_protect_seq = SEQCNT_ZERO(efi_mm.write_protect_seq),

View File

@ -20,15 +20,13 @@
*/
void task_mem(struct seq_file *m, struct mm_struct *mm)
{
VMA_ITERATOR(vmi, mm, 0);
struct vm_area_struct *vma;
struct vm_region *region;
struct rb_node *p;
unsigned long bytes = 0, sbytes = 0, slack = 0, size;
mmap_read_lock(mm);
for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) {
vma = rb_entry(p, struct vm_area_struct, vm_rb);
mmap_read_lock(mm);
for_each_vma(vmi, vma) {
bytes += kobjsize(vma);
region = vma->vm_region;
@ -82,15 +80,13 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
unsigned long task_vsize(struct mm_struct *mm)
{
VMA_ITERATOR(vmi, mm, 0);
struct vm_area_struct *vma;
struct rb_node *p;
unsigned long vsize = 0;
mmap_read_lock(mm);
for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) {
vma = rb_entry(p, struct vm_area_struct, vm_rb);
for_each_vma(vmi, vma)
vsize += vma->vm_end - vma->vm_start;
}
mmap_read_unlock(mm);
return vsize;
}
@ -99,14 +95,13 @@ unsigned long task_statm(struct mm_struct *mm,
unsigned long *shared, unsigned long *text,
unsigned long *data, unsigned long *resident)
{
VMA_ITERATOR(vmi, mm, 0);
struct vm_area_struct *vma;
struct vm_region *region;
struct rb_node *p;
unsigned long size = kobjsize(mm);
mmap_read_lock(mm);
for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) {
vma = rb_entry(p, struct vm_area_struct, vm_rb);
for_each_vma(vmi, vma) {
size += kobjsize(vma);
region = vma->vm_region;
if (region) {
@ -190,17 +185,19 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
*/
static int show_map(struct seq_file *m, void *_p)
{
struct rb_node *p = _p;
return nommu_vma_show(m, rb_entry(p, struct vm_area_struct, vm_rb));
return nommu_vma_show(m, _p);
}
static void *m_start(struct seq_file *m, loff_t *pos)
{
struct proc_maps_private *priv = m->private;
struct mm_struct *mm;
struct rb_node *p;
loff_t n = *pos;
struct vm_area_struct *vma;
unsigned long addr = *pos;
/* See m_next(). Zero at the start or after lseek. */
if (addr == -1UL)
return NULL;
/* pin the task and mm whilst we play with them */
priv->task = get_proc_task(priv->inode);
@ -216,10 +213,10 @@ static void *m_start(struct seq_file *m, loff_t *pos)
return ERR_PTR(-EINTR);
}
/* start from the Nth VMA */
for (p = rb_first(&mm->mm_rb); p; p = rb_next(p))
if (n-- == 0)
return p;
/* start the next element from addr */
vma = find_vma(mm, addr);
if (vma)
return vma;
mmap_read_unlock(mm);
mmput(mm);
@ -242,10 +239,10 @@ static void m_stop(struct seq_file *m, void *_vml)
static void *m_next(struct seq_file *m, void *_p, loff_t *pos)
{
struct rb_node *p = _p;
struct vm_area_struct *vma = _p;
(*pos)++;
return p ? rb_next(p) : NULL;
*pos = vma->vm_end;
return find_vma(vma->vm_mm, vma->vm_end);
}
static const struct seq_operations proc_pid_maps_ops = {

View File

@ -661,6 +661,38 @@ static inline bool vma_is_accessible(struct vm_area_struct *vma)
return vma->vm_flags & VM_ACCESS_FLAGS;
}
static inline
struct vm_area_struct *vma_find(struct vma_iterator *vmi, unsigned long max)
{
return mas_find(&vmi->mas, max);
}
static inline struct vm_area_struct *vma_next(struct vma_iterator *vmi)
{
/*
* Uses vma_find() to get the first VMA when the iterator starts.
* Calling mas_next() could skip the first entry.
*/
return vma_find(vmi, ULONG_MAX);
}
static inline struct vm_area_struct *vma_prev(struct vma_iterator *vmi)
{
return mas_prev(&vmi->mas, 0);
}
static inline unsigned long vma_iter_addr(struct vma_iterator *vmi)
{
return vmi->mas.index;
}
#define for_each_vma(__vmi, __vma) \
while (((__vma) = vma_next(&(__vmi))) != NULL)
/* The MM code likes to work with exclusive end addresses */
#define for_each_vma_range(__vmi, __vma, __end) \
while (((__vma) = vma_find(&(__vmi), (__end) - 1)) != NULL)
#ifdef CONFIG_SHMEM
/*
* The vma_is_shmem is not inline because it is used only by slow
@ -2630,6 +2662,9 @@ extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
bool *need_rmap_locks);
extern void exit_mmap(struct mm_struct *);
void vma_mas_store(struct vm_area_struct *vma, struct ma_state *mas);
void vma_mas_remove(struct vm_area_struct *vma, struct ma_state *mas);
static inline int check_data_rlimit(unsigned long rlim,
unsigned long new,
unsigned long start,

View File

@ -9,6 +9,7 @@
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/rbtree.h>
#include <linux/maple_tree.h>
#include <linux/rwsem.h>
#include <linux/completion.h>
#include <linux/cpumask.h>
@ -486,6 +487,7 @@ struct kioctx_table;
struct mm_struct {
struct {
struct vm_area_struct *mmap; /* list of VMAs */
struct maple_tree mm_mt;
struct rb_root mm_rb;
u64 vmacache_seqnum; /* per-thread vmacache */
#ifdef CONFIG_MMU
@ -697,6 +699,7 @@ struct mm_struct {
unsigned long cpu_bitmap[];
};
#define MM_MT_FLAGS (MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN)
extern struct mm_struct init_mm;
/* Pointer magic because the dynamic array size confuses some compilers. */
@ -774,6 +777,27 @@ static inline void lru_gen_use_mm(struct mm_struct *mm)
#endif /* CONFIG_LRU_GEN */
struct vma_iterator {
struct ma_state mas;
};
#define VMA_ITERATOR(name, __mm, __addr) \
struct vma_iterator name = { \
.mas = { \
.tree = &(__mm)->mm_mt, \
.index = __addr, \
.node = MAS_START, \
}, \
}
static inline void vma_iter_init(struct vma_iterator *vmi,
struct mm_struct *mm, unsigned long addr)
{
vmi->mas.tree = &mm->mm_mt;
vmi->mas.index = addr;
vmi->mas.node = MAS_START;
}
struct mmu_gather;
extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm);
extern void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm);

View File

@ -42,6 +42,79 @@ TRACE_EVENT(vm_unmapped_area,
__entry->low_limit, __entry->high_limit, __entry->align_mask,
__entry->align_offset)
);
TRACE_EVENT(vma_mas_szero,
TP_PROTO(struct maple_tree *mt, unsigned long start,
unsigned long end),
TP_ARGS(mt, start, end),
TP_STRUCT__entry(
__field(struct maple_tree *, mt)
__field(unsigned long, start)
__field(unsigned long, end)
),
TP_fast_assign(
__entry->mt = mt;
__entry->start = start;
__entry->end = end;
),
TP_printk("mt_mod %p, (NULL), SNULL, %lu, %lu,",
__entry->mt,
(unsigned long) __entry->start,
(unsigned long) __entry->end
)
);
TRACE_EVENT(vma_store,
TP_PROTO(struct maple_tree *mt, struct vm_area_struct *vma),
TP_ARGS(mt, vma),
TP_STRUCT__entry(
__field(struct maple_tree *, mt)
__field(struct vm_area_struct *, vma)
__field(unsigned long, vm_start)
__field(unsigned long, vm_end)
),
TP_fast_assign(
__entry->mt = mt;
__entry->vma = vma;
__entry->vm_start = vma->vm_start;
__entry->vm_end = vma->vm_end - 1;
),
TP_printk("mt_mod %p, (%p), STORE, %lu, %lu,",
__entry->mt, __entry->vma,
(unsigned long) __entry->vm_start,
(unsigned long) __entry->vm_end
)
);
TRACE_EVENT(exit_mmap,
TP_PROTO(struct mm_struct *mm),
TP_ARGS(mm),
TP_STRUCT__entry(
__field(struct mm_struct *, mm)
__field(struct maple_tree *, mt)
),
TP_fast_assign(
__entry->mm = mm;
__entry->mt = &mm->mm_mt;
),
TP_printk("mt_mod %p, DESTROY\n",
__entry->mt
)
);
#endif
/* This part must be outside protection */

View File

@ -591,8 +591,10 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
struct vm_area_struct *mpnt, *tmp, *prev, **pprev;
struct rb_node **rb_link, *rb_parent;
int retval;
unsigned long charge;
unsigned long charge = 0;
LIST_HEAD(uf);
MA_STATE(old_mas, &oldmm->mm_mt, 0, 0);
MA_STATE(mas, &mm->mm_mt, 0, 0);
uprobe_start_dup_mmap();
if (mmap_write_lock_killable(oldmm)) {
@ -622,8 +624,17 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
goto out;
khugepaged_fork(mm, oldmm);
retval = mas_expected_entries(&mas, oldmm->map_count);
if (retval)
goto out;
prev = NULL;
for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
retval = mas_expected_entries(&mas, oldmm->map_count);
if (retval)
goto out;
mas_for_each(&old_mas, mpnt, ULONG_MAX) {
struct file *file;
if (mpnt->vm_flags & VM_DONTCOPY) {
@ -637,7 +648,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
*/
if (fatal_signal_pending(current)) {
retval = -EINTR;
goto out;
goto loop_out;
}
if (mpnt->vm_flags & VM_ACCOUNT) {
unsigned long len = vma_pages(mpnt);
@ -702,6 +713,13 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
rb_link = &tmp->vm_rb.rb_right;
rb_parent = &tmp->vm_rb;
/* Link the vma into the MT */
mas.index = tmp->vm_start;
mas.last = tmp->vm_end - 1;
mas_store(&mas, tmp);
if (mas_is_err(&mas))
goto fail_nomem_mas_store;
mm->map_count++;
if (!(tmp->vm_flags & VM_WIPEONFORK))
retval = copy_page_range(tmp, mpnt);
@ -710,10 +728,12 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
tmp->vm_ops->open(tmp);
if (retval)
goto out;
goto loop_out;
}
/* a new mm has just been created */
retval = arch_dup_mmap(oldmm, mm);
loop_out:
mas_destroy(&mas);
out:
mmap_write_unlock(mm);
flush_tlb_mm(oldmm);
@ -722,6 +742,9 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
fail_uprobe_end:
uprobe_end_dup_mmap();
return retval;
fail_nomem_mas_store:
unlink_anon_vmas(tmp);
fail_nomem_anon_vma_fork:
mpol_put(vma_policy(tmp));
fail_nomem_policy:
@ -729,7 +752,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
fail_nomem:
retval = -ENOMEM;
vm_unacct_memory(charge);
goto out;
goto loop_out;
}
static inline int mm_alloc_pgd(struct mm_struct *mm)
@ -1122,6 +1145,8 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
{
mm->mmap = NULL;
mm->mm_rb = RB_ROOT;
mt_init_flags(&mm->mm_mt, MM_MT_FLAGS);
mt_set_external_lock(&mm->mm_mt, &mm->mmap_lock);
mm->vmacache_seqnum = 0;
atomic_set(&mm->mm_users, 1);
atomic_set(&mm->mm_count, 1);

View File

@ -14,33 +14,19 @@
#include <kunit/test.h>
static void __link_vmas(struct vm_area_struct *vmas, ssize_t nr_vmas)
static void __link_vmas(struct maple_tree *mt, struct vm_area_struct *vmas,
ssize_t nr_vmas)
{
int i, j;
unsigned long largest_gap, gap;
int i;
MA_STATE(mas, mt, 0, 0);
if (!nr_vmas)
return;
for (i = 0; i < nr_vmas - 1; i++) {
vmas[i].vm_next = &vmas[i + 1];
vmas[i].vm_rb.rb_left = NULL;
vmas[i].vm_rb.rb_right = &vmas[i + 1].vm_rb;
largest_gap = 0;
for (j = i; j < nr_vmas; j++) {
if (j == 0)
continue;
gap = vmas[j].vm_start - vmas[j - 1].vm_end;
if (gap > largest_gap)
largest_gap = gap;
}
vmas[i].rb_subtree_gap = largest_gap;
}
vmas[i].vm_next = NULL;
vmas[i].vm_rb.rb_right = NULL;
vmas[i].rb_subtree_gap = 0;
mas_lock(&mas);
for (i = 0; i < nr_vmas; i++)
vma_mas_store(&vmas[i], &mas);
mas_unlock(&mas);
}
/*
@ -72,6 +58,7 @@ static void __link_vmas(struct vm_area_struct *vmas, ssize_t nr_vmas)
*/
static void damon_test_three_regions_in_vmas(struct kunit *test)
{
static struct mm_struct mm;
struct damon_addr_range regions[3] = {0,};
/* 10-20-25, 200-210-220, 300-305, 307-330 */
struct vm_area_struct vmas[] = {
@ -83,9 +70,10 @@ static void damon_test_three_regions_in_vmas(struct kunit *test)
(struct vm_area_struct) {.vm_start = 307, .vm_end = 330},
};
__link_vmas(vmas, 6);
mt_init_flags(&mm.mm_mt, MM_MT_FLAGS);
__link_vmas(&mm.mm_mt, vmas, ARRAY_SIZE(vmas));
__damon_va_three_regions(&vmas[0], regions);
__damon_va_three_regions(&mm, regions);
KUNIT_EXPECT_EQ(test, 10ul, regions[0].start);
KUNIT_EXPECT_EQ(test, 25ul, regions[0].end);

View File

@ -113,37 +113,38 @@ static unsigned long sz_range(struct damon_addr_range *r)
*
* Returns 0 if success, or negative error code otherwise.
*/
static int __damon_va_three_regions(struct vm_area_struct *vma,
static int __damon_va_three_regions(struct mm_struct *mm,
struct damon_addr_range regions[3])
{
struct damon_addr_range gap = {0}, first_gap = {0}, second_gap = {0};
struct vm_area_struct *last_vma = NULL;
unsigned long start = 0;
struct rb_root rbroot;
struct damon_addr_range first_gap = {0}, second_gap = {0};
VMA_ITERATOR(vmi, mm, 0);
struct vm_area_struct *vma, *prev = NULL;
unsigned long start;
/* Find two biggest gaps so that first_gap > second_gap > others */
for (; vma; vma = vma->vm_next) {
if (!last_vma) {
/*
* Find the two biggest gaps so that first_gap > second_gap > others.
* If this is too slow, it can be optimised to examine the maple
* tree gaps.
*/
for_each_vma(vmi, vma) {
unsigned long gap;
if (!prev) {
start = vma->vm_start;
goto next;
}
gap = vma->vm_start - prev->vm_end;
if (vma->rb_subtree_gap <= sz_range(&second_gap)) {
rbroot.rb_node = &vma->vm_rb;
vma = rb_entry(rb_last(&rbroot),
struct vm_area_struct, vm_rb);
goto next;
}
gap.start = last_vma->vm_end;
gap.end = vma->vm_start;
if (sz_range(&gap) > sz_range(&second_gap)) {
swap(gap, second_gap);
if (sz_range(&second_gap) > sz_range(&first_gap))
swap(second_gap, first_gap);
if (gap > sz_range(&first_gap)) {
second_gap = first_gap;
first_gap.start = prev->vm_end;
first_gap.end = vma->vm_start;
} else if (gap > sz_range(&second_gap)) {
second_gap.start = prev->vm_end;
second_gap.end = vma->vm_start;
}
next:
last_vma = vma;
prev = vma;
}
if (!sz_range(&second_gap) || !sz_range(&first_gap))
@ -159,7 +160,7 @@ static int __damon_va_three_regions(struct vm_area_struct *vma,
regions[1].start = ALIGN(first_gap.end, DAMON_MIN_REGION);
regions[1].end = ALIGN(second_gap.start, DAMON_MIN_REGION);
regions[2].start = ALIGN(second_gap.end, DAMON_MIN_REGION);
regions[2].end = ALIGN(last_vma->vm_end, DAMON_MIN_REGION);
regions[2].end = ALIGN(prev->vm_end, DAMON_MIN_REGION);
return 0;
}
@ -180,7 +181,7 @@ static int damon_va_three_regions(struct damon_target *t,
return -EINVAL;
mmap_read_lock(mm);
rc = __damon_va_three_regions(mm->mmap, regions);
rc = __damon_va_three_regions(mm, regions);
mmap_read_unlock(mm);
mmput(mm);

View File

@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/mm_types.h>
#include <linux/rbtree.h>
#include <linux/maple_tree.h>
#include <linux/rwsem.h>
#include <linux/spinlock.h>
#include <linux/list.h>
@ -29,6 +30,7 @@
*/
struct mm_struct init_mm = {
.mm_rb = RB_ROOT,
.mm_mt = MTREE_INIT_EXT(mm_mt, MM_MT_FLAGS, init_mm.mmap_lock),
.pgd = swapper_pg_dir,
.mm_users = ATOMIC_INIT(2),
.mm_count = ATOMIC_INIT(1),

611
mm/mmap.c

File diff suppressed because it is too large Load Diff

View File

@ -545,6 +545,19 @@ static void put_nommu_region(struct vm_region *region)
__put_nommu_region(region);
}
void vma_mas_store(struct vm_area_struct *vma, struct ma_state *mas)
{
mas_set_range(mas, vma->vm_start, vma->vm_end - 1);
mas_store_prealloc(mas, vma);
}
void vma_mas_remove(struct vm_area_struct *vma, struct ma_state *mas)
{
mas->index = vma->vm_start;
mas->last = vma->vm_end - 1;
mas_store_prealloc(mas, NULL);
}
/*
* add a VMA into a process's mm_struct in the appropriate place in the list
* and tree and add to the address space's page tree also if not an anonymous