diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index c89bf230af67..078a7f1ac34c 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -240,11 +240,9 @@ static int is_rmap_pte(u64 pte) return is_shadow_present_pte(pte); } -static struct page *spte_to_page(u64 pte) +static pfn_t spte_to_pfn(u64 pte) { - hfn_t hfn = (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; - - return pfn_to_page(hfn); + return (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; } static gfn_t pse36_gfn_delta(u32 gpte) @@ -541,20 +539,20 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) struct kvm_rmap_desc *desc; struct kvm_rmap_desc *prev_desc; struct kvm_mmu_page *sp; - struct page *page; + pfn_t pfn; unsigned long *rmapp; int i; if (!is_rmap_pte(*spte)) return; sp = page_header(__pa(spte)); - page = spte_to_page(*spte); + pfn = spte_to_pfn(*spte); if (*spte & PT_ACCESSED_MASK) - mark_page_accessed(page); + kvm_set_pfn_accessed(pfn); if (is_writeble_pte(*spte)) - kvm_release_page_dirty(page); + kvm_release_pfn_dirty(pfn); else - kvm_release_page_clean(page); + kvm_release_pfn_clean(pfn); rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], is_large_pte(*spte)); if (!*rmapp) { printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); @@ -635,11 +633,11 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn) spte = rmap_next(kvm, rmapp, spte); } if (write_protected) { - struct page *page; + pfn_t pfn; spte = rmap_next(kvm, rmapp, NULL); - page = spte_to_page(*spte); - SetPageDirty(page); + pfn = spte_to_pfn(*spte); + kvm_set_pfn_dirty(pfn); } /* check for huge page mappings */ @@ -1036,7 +1034,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, unsigned pt_access, unsigned pte_access, int user_fault, int write_fault, int dirty, int *ptwrite, int largepage, gfn_t gfn, - struct page *page, bool speculative) + pfn_t pfn, bool speculative) { u64 spte; int was_rmapped = 0; @@ -1058,10 +1056,9 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, child = page_header(pte & PT64_BASE_ADDR_MASK); mmu_page_remove_parent_pte(child, shadow_pte); - } else if (page != spte_to_page(*shadow_pte)) { + } else if (pfn != spte_to_pfn(*shadow_pte)) { pgprintk("hfn old %lx new %lx\n", - page_to_pfn(spte_to_page(*shadow_pte)), - page_to_pfn(page)); + spte_to_pfn(*shadow_pte), pfn); rmap_remove(vcpu->kvm, shadow_pte); } else { if (largepage) @@ -1090,7 +1087,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, if (largepage) spte |= PT_PAGE_SIZE_MASK; - spte |= page_to_phys(page); + spte |= (u64)pfn << PAGE_SHIFT; if ((pte_access & ACC_WRITE_MASK) || (write_fault && !is_write_protection(vcpu) && !user_fault)) { @@ -1135,12 +1132,12 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, if (!was_rmapped) { rmap_add(vcpu, shadow_pte, gfn, largepage); if (!is_rmap_pte(*shadow_pte)) - kvm_release_page_clean(page); + kvm_release_pfn_clean(pfn); } else { if (was_writeble) - kvm_release_page_dirty(page); + kvm_release_pfn_dirty(pfn); else - kvm_release_page_clean(page); + kvm_release_pfn_clean(pfn); } if (!ptwrite || !*ptwrite) vcpu->arch.last_pte_updated = shadow_pte; @@ -1151,7 +1148,7 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) } static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, - int largepage, gfn_t gfn, struct page *page, + int largepage, gfn_t gfn, pfn_t pfn, int level) { hpa_t table_addr = vcpu->arch.mmu.root_hpa; @@ -1166,13 +1163,13 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, if (level == 1) { mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL, - 0, write, 1, &pt_write, 0, gfn, page, false); + 0, write, 1, &pt_write, 0, gfn, pfn, false); return pt_write; } if (largepage && level == 2) { mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL, - 0, write, 1, &pt_write, 1, gfn, page, false); + 0, write, 1, &pt_write, 1, gfn, pfn, false); return pt_write; } @@ -1187,7 +1184,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, 1, ACC_ALL, &table[index]); if (!new_table) { pgprintk("nonpaging_map: ENOMEM\n"); - kvm_release_page_clean(page); + kvm_release_pfn_clean(pfn); return -ENOMEM; } @@ -1202,8 +1199,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) { int r; int largepage = 0; - - struct page *page; + pfn_t pfn; down_read(¤t->mm->mmap_sem); if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) { @@ -1211,18 +1207,18 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) largepage = 1; } - page = gfn_to_page(vcpu->kvm, gfn); + pfn = gfn_to_pfn(vcpu->kvm, gfn); up_read(¤t->mm->mmap_sem); /* mmio */ - if (is_error_page(page)) { - kvm_release_page_clean(page); + if (is_error_pfn(pfn)) { + kvm_release_pfn_clean(pfn); return 1; } spin_lock(&vcpu->kvm->mmu_lock); kvm_mmu_free_some_pages(vcpu); - r = __direct_map(vcpu, v, write, largepage, gfn, page, + r = __direct_map(vcpu, v, write, largepage, gfn, pfn, PT32E_ROOT_LEVEL); spin_unlock(&vcpu->kvm->mmu_lock); @@ -1355,7 +1351,7 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code) { - struct page *page; + pfn_t pfn; int r; int largepage = 0; gfn_t gfn = gpa >> PAGE_SHIFT; @@ -1372,16 +1368,16 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, gfn &= ~(KVM_PAGES_PER_HPAGE-1); largepage = 1; } - page = gfn_to_page(vcpu->kvm, gfn); + pfn = gfn_to_pfn(vcpu->kvm, gfn); up_read(¤t->mm->mmap_sem); - if (is_error_page(page)) { - kvm_release_page_clean(page); + if (is_error_pfn(pfn)) { + kvm_release_pfn_clean(pfn); return 1; } spin_lock(&vcpu->kvm->mmu_lock); kvm_mmu_free_some_pages(vcpu); r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, - largepage, gfn, page, TDP_ROOT_LEVEL); + largepage, gfn, pfn, TDP_ROOT_LEVEL); spin_unlock(&vcpu->kvm->mmu_lock); return r; @@ -1525,6 +1521,8 @@ static int init_kvm_softmmu(struct kvm_vcpu *vcpu) static int init_kvm_mmu(struct kvm_vcpu *vcpu) { + vcpu->arch.update_pte.pfn = bad_pfn; + if (tdp_enabled) return init_kvm_tdp_mmu(vcpu); else @@ -1644,7 +1642,7 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, gfn_t gfn; int r; u64 gpte = 0; - struct page *page; + pfn_t pfn; vcpu->arch.update_pte.largepage = 0; @@ -1680,15 +1678,15 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, gfn &= ~(KVM_PAGES_PER_HPAGE-1); vcpu->arch.update_pte.largepage = 1; } - page = gfn_to_page(vcpu->kvm, gfn); + pfn = gfn_to_pfn(vcpu->kvm, gfn); up_read(¤t->mm->mmap_sem); - if (is_error_page(page)) { - kvm_release_page_clean(page); + if (is_error_pfn(pfn)) { + kvm_release_pfn_clean(pfn); return; } vcpu->arch.update_pte.gfn = gfn; - vcpu->arch.update_pte.page = page; + vcpu->arch.update_pte.pfn = pfn; } void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, @@ -1793,9 +1791,9 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, } kvm_mmu_audit(vcpu, "post pte write"); spin_unlock(&vcpu->kvm->mmu_lock); - if (vcpu->arch.update_pte.page) { - kvm_release_page_clean(vcpu->arch.update_pte.page); - vcpu->arch.update_pte.page = NULL; + if (!is_error_pfn(vcpu->arch.update_pte.pfn)) { + kvm_release_pfn_clean(vcpu->arch.update_pte.pfn); + vcpu->arch.update_pte.pfn = bad_pfn; } } @@ -2236,8 +2234,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte, audit_mappings_page(vcpu, ent, va, level - 1); } else { gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, va); - struct page *page = gpa_to_page(vcpu, gpa); - hpa_t hpa = page_to_phys(page); + hpa_t hpa = (hpa_t)gpa_to_pfn(vcpu, gpa) << PAGE_SHIFT; if (is_shadow_present_pte(ent) && (ent & PT64_BASE_ADDR_MASK) != hpa) @@ -2250,7 +2247,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte, && !is_error_hpa(hpa)) printk(KERN_ERR "audit: (%s) notrap shadow," " valid guest gva %lx\n", audit_msg, va); - kvm_release_page_clean(page); + kvm_release_pfn_clean(pfn); } } diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 57d872aec663..156fe10288ae 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -247,7 +247,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, { pt_element_t gpte; unsigned pte_access; - struct page *npage; + pfn_t pfn; int largepage = vcpu->arch.update_pte.largepage; gpte = *(const pt_element_t *)pte; @@ -260,13 +260,13 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, pte_access = page->role.access & FNAME(gpte_access)(vcpu, gpte); if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn) return; - npage = vcpu->arch.update_pte.page; - if (!npage) + pfn = vcpu->arch.update_pte.pfn; + if (is_error_pfn(pfn)) return; - get_page(npage); + kvm_get_pfn(pfn); mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte), - npage, true); + pfn, true); } /* @@ -275,7 +275,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, struct guest_walker *walker, int user_fault, int write_fault, int largepage, - int *ptwrite, struct page *page) + int *ptwrite, pfn_t pfn) { hpa_t shadow_addr; int level; @@ -336,7 +336,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, walker->pte_gpa[level - 2], &curr_pte, sizeof(curr_pte)); if (r || curr_pte != walker->ptes[level - 2]) { - kvm_release_page_clean(page); + kvm_release_pfn_clean(pfn); return NULL; } } @@ -349,7 +349,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access, user_fault, write_fault, walker->ptes[walker->level-1] & PT_DIRTY_MASK, - ptwrite, largepage, walker->gfn, page, false); + ptwrite, largepage, walker->gfn, pfn, false); return shadow_ent; } @@ -378,7 +378,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u64 *shadow_pte; int write_pt = 0; int r; - struct page *page; + pfn_t pfn; int largepage = 0; pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); @@ -413,20 +413,20 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, largepage = 1; } } - page = gfn_to_page(vcpu->kvm, walker.gfn); + pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); up_read(¤t->mm->mmap_sem); /* mmio */ - if (is_error_page(page)) { + if (is_error_pfn(pfn)) { pgprintk("gfn %x is mmio\n", walker.gfn); - kvm_release_page_clean(page); + kvm_release_pfn_clean(pfn); return 1; } spin_lock(&vcpu->kvm->mmu_lock); kvm_mmu_free_some_pages(vcpu); shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, - largepage, &write_pt, page); + largepage, &write_pt, pfn); pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__, shadow_pte, *shadow_pte, write_pt); diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index b9230490d777..de3eccfb767c 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h @@ -248,8 +248,8 @@ struct kvm_vcpu_arch { u64 *last_pte_updated; struct { - gfn_t gfn; /* presumed gfn during guest pte update */ - struct page *page; /* page corresponding to that gfn */ + gfn_t gfn; /* presumed gfn during guest pte update */ + pfn_t pfn; /* pfn corresponding to that gfn */ int largepage; } update_pte; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a2ceb51b4274..578c3638bbba 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -150,8 +150,10 @@ static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; } struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva); extern struct page *bad_page; +extern pfn_t bad_pfn; int is_error_page(struct page *page); +int is_error_pfn(pfn_t pfn); int kvm_is_error_hva(unsigned long addr); int kvm_set_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, @@ -168,6 +170,16 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); void kvm_release_page_clean(struct page *page); void kvm_release_page_dirty(struct page *page); +void kvm_set_page_dirty(struct page *page); +void kvm_set_page_accessed(struct page *page); + +pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); +void kvm_release_pfn_dirty(pfn_t); +void kvm_release_pfn_clean(pfn_t pfn); +void kvm_set_pfn_dirty(pfn_t pfn); +void kvm_set_pfn_accessed(pfn_t pfn); +void kvm_get_pfn(pfn_t pfn); + int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, int len); int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index 1c4e46decb22..9b6f395c9625 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -38,6 +38,8 @@ typedef unsigned long hva_t; typedef u64 hpa_t; typedef unsigned long hfn_t; +typedef hfn_t pfn_t; + struct kvm_pio_request { unsigned long count; int cur_count; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 93ed78b015c0..6a52c084e068 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -458,6 +459,12 @@ int is_error_page(struct page *page) } EXPORT_SYMBOL_GPL(is_error_page); +int is_error_pfn(pfn_t pfn) +{ + return pfn == bad_pfn; +} +EXPORT_SYMBOL_GPL(is_error_pfn); + static inline unsigned long bad_hva(void) { return PAGE_OFFSET; @@ -519,7 +526,7 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) /* * Requires current->mm->mmap_sem to be held */ -struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) +pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) { struct page *page[1]; unsigned long addr; @@ -530,7 +537,7 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) addr = gfn_to_hva(kvm, gfn); if (kvm_is_error_hva(addr)) { get_page(bad_page); - return bad_page; + return page_to_pfn(bad_page); } npages = get_user_pages(current, current->mm, addr, 1, 1, 1, page, @@ -538,28 +545,72 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) if (npages != 1) { get_page(bad_page); - return bad_page; + return page_to_pfn(bad_page); } - return page[0]; + return page_to_pfn(page[0]); +} + +EXPORT_SYMBOL_GPL(gfn_to_pfn); + +struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) +{ + return pfn_to_page(gfn_to_pfn(kvm, gfn)); } EXPORT_SYMBOL_GPL(gfn_to_page); void kvm_release_page_clean(struct page *page) { - put_page(page); + kvm_release_pfn_clean(page_to_pfn(page)); } EXPORT_SYMBOL_GPL(kvm_release_page_clean); +void kvm_release_pfn_clean(pfn_t pfn) +{ + put_page(pfn_to_page(pfn)); +} +EXPORT_SYMBOL_GPL(kvm_release_pfn_clean); + void kvm_release_page_dirty(struct page *page) { - if (!PageReserved(page)) - SetPageDirty(page); - put_page(page); + kvm_release_pfn_dirty(page_to_pfn(page)); } EXPORT_SYMBOL_GPL(kvm_release_page_dirty); +void kvm_release_pfn_dirty(pfn_t pfn) +{ + kvm_set_pfn_dirty(pfn); + kvm_release_pfn_clean(pfn); +} +EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty); + +void kvm_set_page_dirty(struct page *page) +{ + kvm_set_pfn_dirty(page_to_pfn(page)); +} +EXPORT_SYMBOL_GPL(kvm_set_page_dirty); + +void kvm_set_pfn_dirty(pfn_t pfn) +{ + struct page *page = pfn_to_page(pfn); + if (!PageReserved(page)) + SetPageDirty(page); +} +EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty); + +void kvm_set_pfn_accessed(pfn_t pfn) +{ + mark_page_accessed(pfn_to_page(pfn)); +} +EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed); + +void kvm_get_pfn(pfn_t pfn) +{ + get_page(pfn_to_page(pfn)); +} +EXPORT_SYMBOL_GPL(kvm_get_pfn); + static int next_segment(unsigned long len, int offset) { if (len > PAGE_SIZE - offset) @@ -1351,6 +1402,7 @@ static struct sys_device kvm_sysdev = { }; struct page *bad_page; +pfn_t bad_pfn; static inline struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) @@ -1392,6 +1444,8 @@ int kvm_init(void *opaque, unsigned int vcpu_size, goto out; } + bad_pfn = page_to_pfn(bad_page); + r = kvm_arch_hardware_setup(); if (r < 0) goto out_free_0;