ANDROID: KVM: arm64: Have different callbacks for PTE manipulation

Move the host specific code for PTE reference counting out of the
pagetable code and define a new structure that wraps all the PTE
manipulation callbacks. This structure will be passed during the
pagetable code initialization and it allows to register different
callback for [guest|host].

Bug: 222044487
Signed-off-by: Sebastian Ene <sebastianene@google.com>
Change-Id: I116e8322935762df2f2be6e8d51a3f0c140b3d36
Signed-off-by: Quentin Perret <qperret@google.com>
This commit is contained in:
Sebastian Ene 2022-11-14 17:02:17 +00:00 committed by Quentin Perret
parent c4714ab75d
commit d096d35445
4 changed files with 92 additions and 34 deletions

View File

@ -200,6 +200,22 @@ enum kvm_pgtable_prot {
typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end,
enum kvm_pgtable_prot prot);
typedef bool (*kvm_pgtable_pte_is_counted_cb_t)(kvm_pte_t pte, u32 level);
/**
* struct kvm_pgtable_pte_ops - PTE callbacks.
* @force_pte_cb: Force the mapping granularity to pages and
* return true if we support this instead of
* block mappings.
* @pte_is_counted_cb Verify the attributes of the @pte argument
* and return true if the descriptor needs to be
* refcounted, otherwise return false.
*/
struct kvm_pgtable_pte_ops {
kvm_pgtable_force_pte_cb_t force_pte_cb;
kvm_pgtable_pte_is_counted_cb_t pte_is_counted_cb;
};
/**
* struct kvm_pgtable - KVM page-table.
* @ia_bits: Maximum input address size, in bits.
@ -208,8 +224,7 @@ typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end,
* @mm_ops: Memory management callbacks.
* @mmu: Stage-2 KVM MMU struct. Unused for stage-1 page-tables.
* @flags: Stage-2 page-table flags.
* @force_pte_cb: Function that returns true if page level mappings must
* be used instead of block mappings.
* @pte_ops: PTE callbacks.
*/
struct kvm_pgtable {
u32 ia_bits;
@ -220,7 +235,7 @@ struct kvm_pgtable {
/* Stage-2 only */
struct kvm_s2_mmu *mmu;
enum kvm_pgtable_stage2_flags flags;
kvm_pgtable_force_pte_cb_t force_pte_cb;
struct kvm_pgtable_pte_ops *pte_ops;
};
/**
@ -349,18 +364,17 @@ size_t kvm_pgtable_stage2_pgd_size(u64 vtcr);
* @mmu: S2 MMU context for this S2 translation
* @mm_ops: Memory management callbacks.
* @flags: Stage-2 configuration flags.
* @force_pte_cb: Function that returns true if page level mappings must
* be used instead of block mappings.
* @pte_ops: PTE callbacks.
*
* Return: 0 on success, negative error code on failure.
*/
int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
struct kvm_pgtable_mm_ops *mm_ops,
enum kvm_pgtable_stage2_flags flags,
kvm_pgtable_force_pte_cb_t force_pte_cb);
struct kvm_pgtable_pte_ops *pte_ops);
#define kvm_pgtable_stage2_init(pgt, mmu, mm_ops) \
__kvm_pgtable_stage2_init(pgt, mmu, mm_ops, 0, NULL)
#define kvm_pgtable_stage2_init(pgt, mmu, mm_ops, pte_ops) \
__kvm_pgtable_stage2_init(pgt, mmu, mm_ops, 0, pte_ops)
/**
* kvm_pgtable_stage2_destroy() - Destroy an unused guest stage-2 page-table.

View File

@ -30,6 +30,18 @@ static struct hyp_pool host_s2_pool;
static DEFINE_PER_CPU(struct pkvm_hyp_vm *, __current_vm);
#define current_vm (*this_cpu_ptr(&__current_vm))
static struct kvm_pgtable_pte_ops host_s2_pte_ops;
static bool host_stage2_force_pte(u64 addr, u64 end, enum kvm_pgtable_prot prot);
static bool host_stage2_pte_is_counted(kvm_pte_t pte, u32 level);
static bool guest_stage2_force_pte_cb(u64 addr, u64 end,
enum kvm_pgtable_prot prot);
static bool guest_stage2_pte_is_counted(kvm_pte_t pte, u32 level);
static struct kvm_pgtable_pte_ops guest_s2_pte_ops = {
.force_pte_cb = guest_stage2_force_pte_cb,
.pte_is_counted_cb = guest_stage2_pte_is_counted
};
static void guest_lock_component(struct pkvm_hyp_vm *vm)
{
hyp_spin_lock(&vm->lock);
@ -129,8 +141,6 @@ static void prepare_host_vtcr(void)
id_aa64mmfr1_el1_sys_val, phys_shift);
}
static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot);
int kvm_host_prepare_stage2(void *pgt_pool_base)
{
struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu;
@ -144,9 +154,12 @@ int kvm_host_prepare_stage2(void *pgt_pool_base)
if (ret)
return ret;
host_s2_pte_ops.force_pte_cb = host_stage2_force_pte;
host_s2_pte_ops.pte_is_counted_cb = host_stage2_pte_is_counted;
ret = __kvm_pgtable_stage2_init(&host_mmu.pgt, mmu,
&host_mmu.mm_ops, KVM_HOST_S2_FLAGS,
host_stage2_force_pte_cb);
&host_s2_pte_ops);
if (ret)
return ret;
@ -163,6 +176,11 @@ static bool guest_stage2_force_pte_cb(u64 addr, u64 end,
return true;
}
static bool guest_stage2_pte_is_counted(kvm_pte_t pte, u32 level)
{
return host_stage2_pte_is_counted(pte, level);
}
static void *guest_s2_zalloc_pages_exact(size_t size)
{
void *addr = hyp_alloc_pages(&current_vm->pool, get_order(size));
@ -252,7 +270,7 @@ int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd)
guest_lock_component(vm);
ret = __kvm_pgtable_stage2_init(mmu->pgt, mmu, &vm->mm_ops, 0,
guest_stage2_force_pte_cb);
&guest_s2_pte_ops);
guest_unlock_component(vm);
if (ret)
return ret;
@ -618,7 +636,7 @@ int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, enum pkvm_component
return 0;
}
static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot)
static bool host_stage2_force_pte(u64 addr, u64 end, enum kvm_pgtable_prot prot)
{
/*
* Block mappings must be used with care in the host stage-2 as a
@ -640,6 +658,16 @@ static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot pr
return prot != PKVM_HOST_MMIO_PROT;
}
static bool host_stage2_pte_is_counted(kvm_pte_t pte, u32 level)
{
/*
* The refcount tracks valid entries as well as invalid entries if they
* encode ownership of a page to another entity than the page-table
* owner, whose id is 0.
*/
return !!pte;
}
static int host_stage2_idmap(u64 addr)
{
struct kvm_mem_range range;

View File

@ -482,7 +482,7 @@ int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits,
pgt->start_level = KVM_PGTABLE_MAX_LEVELS - levels;
pgt->mm_ops = mm_ops;
pgt->mmu = NULL;
pgt->force_pte_cb = NULL;
pgt->pte_ops = NULL;
return 0;
}
@ -627,16 +627,6 @@ static bool stage2_pte_needs_update(kvm_pte_t old, kvm_pte_t new)
return ((old ^ new) & (~KVM_PTE_LEAF_ATTR_S2_PERMS));
}
static bool stage2_pte_is_counted(kvm_pte_t pte)
{
/*
* The refcount tracks valid entries as well as invalid entries if they
* encode ownership of a page to another entity than the page-table
* owner, whose id is 0.
*/
return !!pte;
}
static void stage2_clear_pte(kvm_pte_t *ptep, struct kvm_s2_mmu *mmu, u64 addr,
u32 level)
{
@ -685,6 +675,7 @@ static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
kvm_pte_t new, old = *ptep;
u64 granule = kvm_granule_size(level), phys = data->phys;
struct kvm_pgtable *pgt = data->mmu->pgt;
struct kvm_pgtable_pte_ops *pte_ops = pgt->pte_ops;
struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
if (!stage2_leaf_mapping_allowed(addr, end, level, data))
@ -695,7 +686,7 @@ static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
else
new = data->annotation;
if (stage2_pte_is_counted(old)) {
if (pte_ops->pte_is_counted_cb(old, level)) {
/*
* Skip updating the PTE if we are trying to recreate the exact
* same mapping or only change the access permissions. Instead,
@ -722,7 +713,7 @@ static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
if (mm_ops->icache_inval_pou && stage2_pte_executable(new))
mm_ops->icache_inval_pou(kvm_pte_follow(new, mm_ops), granule);
if (stage2_pte_is_counted(new))
if (pte_ops->pte_is_counted_cb(new, level))
mm_ops->get_page(ptep);
out_set_pte:
@ -759,11 +750,13 @@ static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
struct stage2_map_data *data)
{
struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
struct kvm_pgtable *pgt = data->mmu->pgt;
struct kvm_pgtable_pte_ops *pte_ops = pgt->pte_ops;
kvm_pte_t *childp, pte = *ptep;
int ret;
if (data->anchor) {
if (stage2_pte_is_counted(pte))
if (pte_ops->pte_is_counted_cb(pte, level))
mm_ops->put_page(ptep);
return 0;
@ -788,7 +781,7 @@ static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
* a table. Accesses beyond 'end' that fall within the new table
* will be mapped lazily.
*/
if (stage2_pte_is_counted(pte))
if (pte_ops->pte_is_counted_cb(pte, level))
stage2_put_pte(ptep, data->mmu, addr, level, mm_ops);
kvm_set_table_pte(ptep, childp, mm_ops);
@ -864,12 +857,12 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
void *mc)
{
int ret;
struct kvm_pgtable_pte_ops *pte_ops = pgt->pte_ops;
struct stage2_map_data map_data = {
.phys = ALIGN_DOWN(phys, PAGE_SIZE),
.mmu = pgt->mmu,
.memcache = mc,
.mm_ops = pgt->mm_ops,
.force_pte = pgt->force_pte_cb && pgt->force_pte_cb(addr, addr + size, prot),
};
struct kvm_pgtable_walker walker = {
.cb = stage2_map_walker,
@ -879,6 +872,9 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
.arg = &map_data,
};
if (pte_ops->force_pte_cb)
map_data.force_pte = pte_ops->force_pte_cb(addr, addr + size, prot);
if (WARN_ON((pgt->flags & KVM_PGTABLE_S2_IDMAP) && (addr != phys)))
return -EINVAL;
@ -925,11 +921,12 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
struct kvm_pgtable *pgt = arg;
struct kvm_s2_mmu *mmu = pgt->mmu;
struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
struct kvm_pgtable_pte_ops *pte_ops = pgt->pte_ops;
kvm_pte_t pte = *ptep, *childp = NULL;
bool need_flush = false;
if (!kvm_pte_valid(pte)) {
if (stage2_pte_is_counted(pte)) {
if (pte_ops->pte_is_counted_cb(pte, level)) {
kvm_clear_pte(ptep);
mm_ops->put_page(ptep);
}
@ -1144,7 +1141,7 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
struct kvm_pgtable_mm_ops *mm_ops,
enum kvm_pgtable_stage2_flags flags,
kvm_pgtable_force_pte_cb_t force_pte_cb)
struct kvm_pgtable_pte_ops *pte_ops)
{
size_t pgd_sz;
u64 vtcr = mmu->arch->vtcr;
@ -1162,7 +1159,7 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
pgt->mm_ops = mm_ops;
pgt->mmu = mmu;
pgt->flags = flags;
pgt->force_pte_cb = force_pte_cb;
pgt->pte_ops = pte_ops;
/* Ensure zeroed PGD pages are visible to the hardware walker */
dsb(ishst);
@ -1184,9 +1181,10 @@ static int stage2_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
{
struct kvm_pgtable *pgt = arg;
struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
struct kvm_pgtable_pte_ops *pte_ops = pgt->pte_ops;
kvm_pte_t pte = *ptep;
if (!stage2_pte_is_counted(pte))
if (!pte_ops->pte_is_counted_cb(pte, level))
return 0;
mm_ops->put_page(ptep);

View File

@ -680,6 +680,17 @@ static int get_user_mapping_size(struct kvm *kvm, u64 addr)
return BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(level));
}
static bool stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot)
{
return true;
}
static bool stage2_pte_is_counted(kvm_pte_t pte, u32 level)
{
return !!pte;
}
static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = {
.zalloc_page = stage2_memcache_zalloc_page,
.zalloc_pages_exact = kvm_s2_zalloc_pages_exact,
@ -693,6 +704,12 @@ static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = {
.icache_inval_pou = invalidate_icache_guest_page,
};
static struct kvm_pgtable_pte_ops kvm_s2_pte_ops = {
.force_pte_cb = stage2_force_pte_cb,
.pte_is_counted_cb = stage2_pte_is_counted
};
/**
* kvm_init_stage2_mmu - Initialise a S2 MMU structure
* @kvm: The pointer to the KVM structure
@ -746,7 +763,8 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t
return -ENOMEM;
mmu->arch = &kvm->arch;
err = kvm_pgtable_stage2_init(pgt, mmu, &kvm_s2_mm_ops);
err = kvm_pgtable_stage2_init(pgt, mmu, &kvm_s2_mm_ops,
&kvm_s2_pte_ops);
if (err)
goto out_free_pgtable;