KVM: MMU: Reinstate pte prefetch on invlpg
Commit fb341f57 removed the pte prefetch on guest invlpg, citing guest races. However, the SDM is adamant that prefetch is allowed: "The processor may create entries in paging-structure caches for translations required for prefetches and for accesses that are a result of speculative execution that would never actually occur in the executed code path." And, in fact, there was a race in the prefetch code: we picked up the pte without the mmu lock held, so an older invlpg could install the pte over a newer invlpg. Reinstate the prefetch logic, but this time note whether another invlpg has executed using a counter. If a race occured, do not install the pte. Signed-off-by: Avi Kivity <avi@redhat.com> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
This commit is contained in:
@ -389,6 +389,7 @@ struct kvm_arch {
|
|||||||
unsigned int n_free_mmu_pages;
|
unsigned int n_free_mmu_pages;
|
||||||
unsigned int n_requested_mmu_pages;
|
unsigned int n_requested_mmu_pages;
|
||||||
unsigned int n_alloc_mmu_pages;
|
unsigned int n_alloc_mmu_pages;
|
||||||
|
atomic_t invlpg_counter;
|
||||||
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
|
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
|
||||||
/*
|
/*
|
||||||
* Hash table of struct kvm_mmu_page.
|
* Hash table of struct kvm_mmu_page.
|
||||||
|
@ -2613,9 +2613,30 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
|
|||||||
int flooded = 0;
|
int flooded = 0;
|
||||||
int npte;
|
int npte;
|
||||||
int r;
|
int r;
|
||||||
|
int invlpg_counter;
|
||||||
|
|
||||||
pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
|
pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
|
||||||
|
|
||||||
|
invlpg_counter = atomic_read(&vcpu->kvm->arch.invlpg_counter);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Assume that the pte write on a page table of the same type
|
||||||
|
* as the current vcpu paging mode. This is nearly always true
|
||||||
|
* (might be false while changing modes). Note it is verified later
|
||||||
|
* by update_pte().
|
||||||
|
*/
|
||||||
|
if ((is_pae(vcpu) && bytes == 4) || !new) {
|
||||||
|
/* Handle a 32-bit guest writing two halves of a 64-bit gpte */
|
||||||
|
if (is_pae(vcpu)) {
|
||||||
|
gpa &= ~(gpa_t)7;
|
||||||
|
bytes = 8;
|
||||||
|
}
|
||||||
|
r = kvm_read_guest(vcpu->kvm, gpa, &gentry, min(bytes, 8));
|
||||||
|
if (r)
|
||||||
|
gentry = 0;
|
||||||
|
new = (const u8 *)&gentry;
|
||||||
|
}
|
||||||
|
|
||||||
switch (bytes) {
|
switch (bytes) {
|
||||||
case 4:
|
case 4:
|
||||||
gentry = *(const u32 *)new;
|
gentry = *(const u32 *)new;
|
||||||
@ -2628,22 +2649,10 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Assume that the pte write on a page table of the same type
|
|
||||||
* as the current vcpu paging mode. This is nearly always true
|
|
||||||
* (might be false while changing modes). Note it is verified later
|
|
||||||
* by update_pte().
|
|
||||||
*/
|
|
||||||
if (is_pae(vcpu) && bytes == 4) {
|
|
||||||
/* Handle a 32-bit guest writing two halves of a 64-bit gpte */
|
|
||||||
gpa &= ~(gpa_t)7;
|
|
||||||
r = kvm_read_guest(vcpu->kvm, gpa, &gentry, 8);
|
|
||||||
if (r)
|
|
||||||
gentry = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
mmu_guess_page_from_pte_write(vcpu, gpa, gentry);
|
mmu_guess_page_from_pte_write(vcpu, gpa, gentry);
|
||||||
spin_lock(&vcpu->kvm->mmu_lock);
|
spin_lock(&vcpu->kvm->mmu_lock);
|
||||||
|
if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter)
|
||||||
|
gentry = 0;
|
||||||
kvm_mmu_access_page(vcpu, gfn);
|
kvm_mmu_access_page(vcpu, gfn);
|
||||||
kvm_mmu_free_some_pages(vcpu);
|
kvm_mmu_free_some_pages(vcpu);
|
||||||
++vcpu->kvm->stat.mmu_pte_write;
|
++vcpu->kvm->stat.mmu_pte_write;
|
||||||
|
@ -463,6 +463,7 @@ out_unlock:
|
|||||||
static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
|
static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
|
||||||
{
|
{
|
||||||
struct kvm_shadow_walk_iterator iterator;
|
struct kvm_shadow_walk_iterator iterator;
|
||||||
|
gpa_t pte_gpa = -1;
|
||||||
int level;
|
int level;
|
||||||
u64 *sptep;
|
u64 *sptep;
|
||||||
int need_flush = 0;
|
int need_flush = 0;
|
||||||
@ -476,6 +477,10 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
|
|||||||
if (level == PT_PAGE_TABLE_LEVEL ||
|
if (level == PT_PAGE_TABLE_LEVEL ||
|
||||||
((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) ||
|
((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) ||
|
||||||
((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) {
|
((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) {
|
||||||
|
struct kvm_mmu_page *sp = page_header(__pa(sptep));
|
||||||
|
|
||||||
|
pte_gpa = (sp->gfn << PAGE_SHIFT);
|
||||||
|
pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
|
||||||
|
|
||||||
if (is_shadow_present_pte(*sptep)) {
|
if (is_shadow_present_pte(*sptep)) {
|
||||||
rmap_remove(vcpu->kvm, sptep);
|
rmap_remove(vcpu->kvm, sptep);
|
||||||
@ -493,7 +498,17 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
|
|||||||
|
|
||||||
if (need_flush)
|
if (need_flush)
|
||||||
kvm_flush_remote_tlbs(vcpu->kvm);
|
kvm_flush_remote_tlbs(vcpu->kvm);
|
||||||
|
|
||||||
|
atomic_inc(&vcpu->kvm->arch.invlpg_counter);
|
||||||
|
|
||||||
spin_unlock(&vcpu->kvm->mmu_lock);
|
spin_unlock(&vcpu->kvm->mmu_lock);
|
||||||
|
|
||||||
|
if (pte_gpa == -1)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (mmu_topup_memory_caches(vcpu))
|
||||||
|
return;
|
||||||
|
kvm_mmu_pte_write(vcpu, pte_gpa, NULL, sizeof(pt_element_t), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access,
|
static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access,
|
||||||
|
Reference in New Issue
Block a user