UPSTREAM: mm: add per-VMA lock and helper functions to control it
Introduce per-VMA locking. The lock implementation relies on a per-vma and per-mm sequence counters to note exclusive locking: - read lock - (implemented by vma_start_read) requires the vma (vm_lock_seq) and mm (mm_lock_seq) sequence counters to differ. If they match then there must be a vma exclusive lock held somewhere. - read unlock - (implemented by vma_end_read) is a trivial vma->lock unlock. - write lock - (vma_start_write) requires the mmap_lock to be held exclusively and the current mm counter is assigned to the vma counter. This will allow multiple vmas to be locked under a single mmap_lock write lock (e.g. during vma merging). The vma counter is modified under exclusive vma lock. - write unlock - (vma_end_write_all) is a batch release of all vma locks held. It doesn't pair with a specific vma_start_write! It is done before exclusive mmap_lock is released by incrementing mm sequence counter (mm_lock_seq). - write downgrade - if the mmap_lock is downgraded to the read lock, all vma write locks are released as well (effectivelly same as write unlock). Link: https://lkml.kernel.org/r/20230227173632.3292573-13-surenb@google.com Signed-off-by: Suren Baghdasaryan <surenb@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> (cherry picked from commit 5e31275cc997f8ec5d9e8d65fe9840ebed89db19) Bug: 161210518 Change-Id: I5e0db53a4b5562e59dd031fabbae4f97acc1bce1 Signed-off-by: Suren Baghdasaryan <surenb@google.com>
This commit is contained in:
parent
882c3512ec
commit
a9ea3113d4
@ -624,6 +624,87 @@ struct vm_operations_struct {
|
|||||||
ANDROID_KABI_RESERVE(4);
|
ANDROID_KABI_RESERVE(4);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#ifdef CONFIG_PER_VMA_LOCK
|
||||||
|
static inline void vma_init_lock(struct vm_area_struct *vma)
|
||||||
|
{
|
||||||
|
init_rwsem(&vma->lock);
|
||||||
|
vma->vm_lock_seq = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Try to read-lock a vma. The function is allowed to occasionally yield false
|
||||||
|
* locked result to avoid performance overhead, in which case we fall back to
|
||||||
|
* using mmap_lock. The function should never yield false unlocked result.
|
||||||
|
*/
|
||||||
|
static inline bool vma_start_read(struct vm_area_struct *vma)
|
||||||
|
{
|
||||||
|
/* Check before locking. A race might cause false locked result. */
|
||||||
|
if (vma->vm_lock_seq == READ_ONCE(vma->vm_mm->mm_lock_seq))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (unlikely(down_read_trylock(&vma->lock) == 0))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Overflow might produce false locked result.
|
||||||
|
* False unlocked result is impossible because we modify and check
|
||||||
|
* vma->vm_lock_seq under vma->lock protection and mm->mm_lock_seq
|
||||||
|
* modification invalidates all existing locks.
|
||||||
|
*/
|
||||||
|
if (unlikely(vma->vm_lock_seq == READ_ONCE(vma->vm_mm->mm_lock_seq))) {
|
||||||
|
up_read(&vma->lock);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void vma_end_read(struct vm_area_struct *vma)
|
||||||
|
{
|
||||||
|
rcu_read_lock(); /* keeps vma alive till the end of up_read */
|
||||||
|
up_read(&vma->lock);
|
||||||
|
rcu_read_unlock();
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void vma_start_write(struct vm_area_struct *vma)
|
||||||
|
{
|
||||||
|
int mm_lock_seq;
|
||||||
|
|
||||||
|
mmap_assert_write_locked(vma->vm_mm);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* current task is holding mmap_write_lock, both vma->vm_lock_seq and
|
||||||
|
* mm->mm_lock_seq can't be concurrently modified.
|
||||||
|
*/
|
||||||
|
mm_lock_seq = READ_ONCE(vma->vm_mm->mm_lock_seq);
|
||||||
|
if (vma->vm_lock_seq == mm_lock_seq)
|
||||||
|
return;
|
||||||
|
|
||||||
|
down_write(&vma->lock);
|
||||||
|
vma->vm_lock_seq = mm_lock_seq;
|
||||||
|
up_write(&vma->lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void vma_assert_write_locked(struct vm_area_struct *vma)
|
||||||
|
{
|
||||||
|
mmap_assert_write_locked(vma->vm_mm);
|
||||||
|
/*
|
||||||
|
* current task is holding mmap_write_lock, both vma->vm_lock_seq and
|
||||||
|
* mm->mm_lock_seq can't be concurrently modified.
|
||||||
|
*/
|
||||||
|
VM_BUG_ON_VMA(vma->vm_lock_seq != READ_ONCE(vma->vm_mm->mm_lock_seq), vma);
|
||||||
|
}
|
||||||
|
|
||||||
|
#else /* CONFIG_PER_VMA_LOCK */
|
||||||
|
|
||||||
|
static inline void vma_init_lock(struct vm_area_struct *vma) {}
|
||||||
|
static inline bool vma_start_read(struct vm_area_struct *vma)
|
||||||
|
{ return false; }
|
||||||
|
static inline void vma_end_read(struct vm_area_struct *vma) {}
|
||||||
|
static inline void vma_start_write(struct vm_area_struct *vma) {}
|
||||||
|
static inline void vma_assert_write_locked(struct vm_area_struct *vma) {}
|
||||||
|
|
||||||
|
#endif /* CONFIG_PER_VMA_LOCK */
|
||||||
|
|
||||||
static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
|
static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
|
||||||
{
|
{
|
||||||
static const struct vm_operations_struct dummy_vm_ops = {};
|
static const struct vm_operations_struct dummy_vm_ops = {};
|
||||||
@ -632,6 +713,7 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
|
|||||||
vma->vm_mm = mm;
|
vma->vm_mm = mm;
|
||||||
vma->vm_ops = &dummy_vm_ops;
|
vma->vm_ops = &dummy_vm_ops;
|
||||||
INIT_LIST_HEAD(&vma->anon_vma_chain);
|
INIT_LIST_HEAD(&vma->anon_vma_chain);
|
||||||
|
vma_init_lock(vma);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Use when VMA is not part of the VMA tree and needs no locking */
|
/* Use when VMA is not part of the VMA tree and needs no locking */
|
||||||
|
@ -473,6 +473,11 @@ struct vm_area_struct {
|
|||||||
vm_flags_t __private __vm_flags;
|
vm_flags_t __private __vm_flags;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#ifdef CONFIG_PER_VMA_LOCK
|
||||||
|
int vm_lock_seq;
|
||||||
|
struct rw_semaphore lock;
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For areas with an address space and backing store,
|
* For areas with an address space and backing store,
|
||||||
* linkage into the address_space->i_mmap interval tree.
|
* linkage into the address_space->i_mmap interval tree.
|
||||||
@ -605,6 +610,9 @@ struct mm_struct {
|
|||||||
* init_mm.mmlist, and are protected
|
* init_mm.mmlist, and are protected
|
||||||
* by mmlist_lock
|
* by mmlist_lock
|
||||||
*/
|
*/
|
||||||
|
#ifdef CONFIG_PER_VMA_LOCK
|
||||||
|
int mm_lock_seq;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
unsigned long hiwater_rss; /* High-watermark of RSS usage */
|
unsigned long hiwater_rss; /* High-watermark of RSS usage */
|
||||||
|
@ -72,6 +72,17 @@ static inline void mmap_assert_write_locked(struct mm_struct *mm)
|
|||||||
VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_lock), mm);
|
VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_lock), mm);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_PER_VMA_LOCK
|
||||||
|
static inline void vma_end_write_all(struct mm_struct *mm)
|
||||||
|
{
|
||||||
|
mmap_assert_write_locked(mm);
|
||||||
|
/* No races during update due to exclusive mmap_lock being held */
|
||||||
|
WRITE_ONCE(mm->mm_lock_seq, mm->mm_lock_seq + 1);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
static inline void vma_end_write_all(struct mm_struct *mm) {}
|
||||||
|
#endif
|
||||||
|
|
||||||
static inline void mmap_init_lock(struct mm_struct *mm)
|
static inline void mmap_init_lock(struct mm_struct *mm)
|
||||||
{
|
{
|
||||||
init_rwsem(&mm->mmap_lock);
|
init_rwsem(&mm->mmap_lock);
|
||||||
@ -114,12 +125,14 @@ static inline bool mmap_write_trylock(struct mm_struct *mm)
|
|||||||
static inline void mmap_write_unlock(struct mm_struct *mm)
|
static inline void mmap_write_unlock(struct mm_struct *mm)
|
||||||
{
|
{
|
||||||
__mmap_lock_trace_released(mm, true);
|
__mmap_lock_trace_released(mm, true);
|
||||||
|
vma_end_write_all(mm);
|
||||||
up_write(&mm->mmap_lock);
|
up_write(&mm->mmap_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void mmap_write_downgrade(struct mm_struct *mm)
|
static inline void mmap_write_downgrade(struct mm_struct *mm)
|
||||||
{
|
{
|
||||||
__mmap_lock_trace_acquire_returned(mm, false, true);
|
__mmap_lock_trace_acquire_returned(mm, false, true);
|
||||||
|
vma_end_write_all(mm);
|
||||||
downgrade_write(&mm->mmap_lock);
|
downgrade_write(&mm->mmap_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -481,6 +481,7 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
|
|||||||
*/
|
*/
|
||||||
data_race(memcpy(new, orig, sizeof(*new)));
|
data_race(memcpy(new, orig, sizeof(*new)));
|
||||||
INIT_LIST_HEAD(&new->anon_vma_chain);
|
INIT_LIST_HEAD(&new->anon_vma_chain);
|
||||||
|
vma_init_lock(new);
|
||||||
dup_anon_vma_name(orig, new);
|
dup_anon_vma_name(orig, new);
|
||||||
}
|
}
|
||||||
return new;
|
return new;
|
||||||
@ -1151,6 +1152,9 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
|
|||||||
seqcount_init(&mm->write_protect_seq);
|
seqcount_init(&mm->write_protect_seq);
|
||||||
mmap_init_lock(mm);
|
mmap_init_lock(mm);
|
||||||
INIT_LIST_HEAD(&mm->mmlist);
|
INIT_LIST_HEAD(&mm->mmlist);
|
||||||
|
#ifdef CONFIG_PER_VMA_LOCK
|
||||||
|
mm->mm_lock_seq = 0;
|
||||||
|
#endif
|
||||||
mm_pgtables_bytes_init(mm);
|
mm_pgtables_bytes_init(mm);
|
||||||
mm->map_count = 0;
|
mm->map_count = 0;
|
||||||
mm->locked_vm = 0;
|
mm->locked_vm = 0;
|
||||||
|
@ -37,6 +37,9 @@ struct mm_struct init_mm = {
|
|||||||
.page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
|
.page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
|
||||||
.arg_lock = __SPIN_LOCK_UNLOCKED(init_mm.arg_lock),
|
.arg_lock = __SPIN_LOCK_UNLOCKED(init_mm.arg_lock),
|
||||||
.mmlist = LIST_HEAD_INIT(init_mm.mmlist),
|
.mmlist = LIST_HEAD_INIT(init_mm.mmlist),
|
||||||
|
#ifdef CONFIG_PER_VMA_LOCK
|
||||||
|
.mm_lock_seq = 0,
|
||||||
|
#endif
|
||||||
.user_ns = &init_user_ns,
|
.user_ns = &init_user_ns,
|
||||||
.cpu_bitmap = CPU_BITS_NONE,
|
.cpu_bitmap = CPU_BITS_NONE,
|
||||||
#ifdef CONFIG_IOMMU_SVA
|
#ifdef CONFIG_IOMMU_SVA
|
||||||
|
Loading…
Reference in New Issue
Block a user