forked from luck/tmp_suning_uos_patched
kvm: introduce manual dirty log reprotect
There are two problems with KVM_GET_DIRTY_LOG. First, and less important, it can take kvm->mmu_lock for an extended period of time. Second, its user can actually see many false positives in some cases. The latter is due to a benign race like this: 1. KVM_GET_DIRTY_LOG returns a set of dirty pages and write protects them. 2. The guest modifies the pages, causing them to be marked ditry. 3. Userspace actually copies the pages. 4. KVM_GET_DIRTY_LOG returns those pages as dirty again, even though they were not written to since (3). This is especially a problem for large guests, where the time between (1) and (3) can be substantial. This patch introduces a new capability which, when enabled, makes KVM_GET_DIRTY_LOG not write-protect the pages it returns. Instead, userspace has to explicitly clear the dirty log bits just before using the content of the page. The new KVM_CLEAR_DIRTY_LOG ioctl can also operate on a 64-page granularity rather than requiring to sync a full memslot; this way, the mmu_lock is taken for small amounts of time, and only a small amount of time will pass between write protection of pages and the sending of their content. Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
parent
8fe65a8299
commit
2a31b9db15
|
@ -305,6 +305,9 @@ the address space for which you want to return the dirty bitmap.
|
||||||
They must be less than the value that KVM_CHECK_EXTENSION returns for
|
They must be less than the value that KVM_CHECK_EXTENSION returns for
|
||||||
the KVM_CAP_MULTI_ADDRESS_SPACE capability.
|
the KVM_CAP_MULTI_ADDRESS_SPACE capability.
|
||||||
|
|
||||||
|
The bits in the dirty bitmap are cleared before the ioctl returns, unless
|
||||||
|
KVM_CAP_MANUAL_DIRTY_LOG_PROTECT is enabled. For more information,
|
||||||
|
see the description of the capability.
|
||||||
|
|
||||||
4.9 KVM_SET_MEMORY_ALIAS
|
4.9 KVM_SET_MEMORY_ALIAS
|
||||||
|
|
||||||
|
@ -3758,6 +3761,46 @@ Coalesced pio is based on coalesced mmio. There is little difference
|
||||||
between coalesced mmio and pio except that coalesced pio records accesses
|
between coalesced mmio and pio except that coalesced pio records accesses
|
||||||
to I/O ports.
|
to I/O ports.
|
||||||
|
|
||||||
|
4.117 KVM_CLEAR_DIRTY_LOG (vm ioctl)
|
||||||
|
|
||||||
|
Capability: KVM_CAP_MANUAL_DIRTY_LOG_PROTECT
|
||||||
|
Architectures: x86
|
||||||
|
Type: vm ioctl
|
||||||
|
Parameters: struct kvm_dirty_log (in)
|
||||||
|
Returns: 0 on success, -1 on error
|
||||||
|
|
||||||
|
/* for KVM_CLEAR_DIRTY_LOG */
|
||||||
|
struct kvm_clear_dirty_log {
|
||||||
|
__u32 slot;
|
||||||
|
__u32 num_pages;
|
||||||
|
__u64 first_page;
|
||||||
|
union {
|
||||||
|
void __user *dirty_bitmap; /* one bit per page */
|
||||||
|
__u64 padding;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
The ioctl clears the dirty status of pages in a memory slot, according to
|
||||||
|
the bitmap that is passed in struct kvm_clear_dirty_log's dirty_bitmap
|
||||||
|
field. Bit 0 of the bitmap corresponds to page "first_page" in the
|
||||||
|
memory slot, and num_pages is the size in bits of the input bitmap.
|
||||||
|
Both first_page and num_pages must be a multiple of 64. For each bit
|
||||||
|
that is set in the input bitmap, the corresponding page is marked "clean"
|
||||||
|
in KVM's dirty bitmap, and dirty tracking is re-enabled for that page
|
||||||
|
(for example via write-protection, or by clearing the dirty bit in
|
||||||
|
a page table entry).
|
||||||
|
|
||||||
|
If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 specifies
|
||||||
|
the address space for which you want to return the dirty bitmap.
|
||||||
|
They must be less than the value that KVM_CHECK_EXTENSION returns for
|
||||||
|
the KVM_CAP_MULTI_ADDRESS_SPACE capability.
|
||||||
|
|
||||||
|
This ioctl is mostly useful when KVM_CAP_MANUAL_DIRTY_LOG_PROTECT
|
||||||
|
is enabled; for more information, see the description of the capability.
|
||||||
|
However, it can always be used as long as KVM_CHECK_EXTENSION confirms
|
||||||
|
that KVM_CAP_MANUAL_DIRTY_LOG_PROTECT is present.
|
||||||
|
|
||||||
|
|
||||||
5. The kvm_run structure
|
5. The kvm_run structure
|
||||||
------------------------
|
------------------------
|
||||||
|
|
||||||
|
@ -4652,6 +4695,30 @@ and injected exceptions.
|
||||||
* For the new DR6 bits, note that bit 16 is set iff the #DB exception
|
* For the new DR6 bits, note that bit 16 is set iff the #DB exception
|
||||||
will clear DR6.RTM.
|
will clear DR6.RTM.
|
||||||
|
|
||||||
|
7.18 KVM_CAP_MANUAL_DIRTY_LOG_PROTECT
|
||||||
|
|
||||||
|
Architectures: all
|
||||||
|
Parameters: args[0] whether feature should be enabled or not
|
||||||
|
|
||||||
|
With this capability enabled, KVM_GET_DIRTY_LOG will not automatically
|
||||||
|
clear and write-protect all pages that are returned as dirty.
|
||||||
|
Rather, userspace will have to do this operation separately using
|
||||||
|
KVM_CLEAR_DIRTY_LOG.
|
||||||
|
|
||||||
|
At the cost of a slightly more complicated operation, this provides better
|
||||||
|
scalability and responsiveness for two reasons. First,
|
||||||
|
KVM_CLEAR_DIRTY_LOG ioctl can operate on a 64-page granularity rather
|
||||||
|
than requiring to sync a full memslot; this ensures that KVM does not
|
||||||
|
take spinlocks for an extended period of time. Second, in some cases a
|
||||||
|
large amount of time can pass between a call to KVM_GET_DIRTY_LOG and
|
||||||
|
userspace actually using the data in the page. Pages can be modified
|
||||||
|
during this time, which is inefficint for both the guest and userspace:
|
||||||
|
the guest will incur a higher penalty due to write protection faults,
|
||||||
|
while userspace can see false reports of dirty pages. Manual reprotection
|
||||||
|
helps reducing this time, improving guest performance and reducing the
|
||||||
|
number of dirty log false positives.
|
||||||
|
|
||||||
|
|
||||||
8. Other capabilities.
|
8. Other capabilities.
|
||||||
----------------------
|
----------------------
|
||||||
|
|
||||||
|
|
|
@ -1023,6 +1023,29 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, struct kvm_clear_dirty_log *log)
|
||||||
|
{
|
||||||
|
struct kvm_memslots *slots;
|
||||||
|
struct kvm_memory_slot *memslot;
|
||||||
|
bool flush = false;
|
||||||
|
int r;
|
||||||
|
|
||||||
|
mutex_lock(&kvm->slots_lock);
|
||||||
|
|
||||||
|
r = kvm_clear_dirty_log_protect(kvm, log, &flush);
|
||||||
|
|
||||||
|
if (flush) {
|
||||||
|
slots = kvm_memslots(kvm);
|
||||||
|
memslot = id_to_memslot(slots, log->slot);
|
||||||
|
|
||||||
|
/* Let implementation handle TLB/GVA invalidation */
|
||||||
|
kvm_mips_callbacks->flush_shadow_memslot(kvm, memslot);
|
||||||
|
}
|
||||||
|
|
||||||
|
mutex_unlock(&kvm->slots_lock);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
|
long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
|
||||||
{
|
{
|
||||||
long r;
|
long r;
|
||||||
|
|
|
@ -4418,6 +4418,33 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, struct kvm_clear_dirty_log *log)
|
||||||
|
{
|
||||||
|
bool flush = false;
|
||||||
|
int r;
|
||||||
|
|
||||||
|
mutex_lock(&kvm->slots_lock);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Flush potentially hardware-cached dirty pages to dirty_bitmap.
|
||||||
|
*/
|
||||||
|
if (kvm_x86_ops->flush_log_dirty)
|
||||||
|
kvm_x86_ops->flush_log_dirty(kvm);
|
||||||
|
|
||||||
|
r = kvm_clear_dirty_log_protect(kvm, log, &flush);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* All the TLBs can be flushed out of mmu lock, see the comments in
|
||||||
|
* kvm_mmu_slot_remove_write_access().
|
||||||
|
*/
|
||||||
|
lockdep_assert_held(&kvm->slots_lock);
|
||||||
|
if (flush)
|
||||||
|
kvm_flush_remote_tlbs(kvm);
|
||||||
|
|
||||||
|
mutex_unlock(&kvm->slots_lock);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
|
int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
|
||||||
bool line_status)
|
bool line_status)
|
||||||
{
|
{
|
||||||
|
|
|
@ -449,6 +449,7 @@ struct kvm {
|
||||||
#endif
|
#endif
|
||||||
long tlbs_dirty;
|
long tlbs_dirty;
|
||||||
struct list_head devices;
|
struct list_head devices;
|
||||||
|
bool manual_dirty_log_protect;
|
||||||
struct dentry *debugfs_dentry;
|
struct dentry *debugfs_dentry;
|
||||||
struct kvm_stat_data **debugfs_stat_data;
|
struct kvm_stat_data **debugfs_stat_data;
|
||||||
struct srcu_struct srcu;
|
struct srcu_struct srcu;
|
||||||
|
@ -754,6 +755,8 @@ int kvm_get_dirty_log(struct kvm *kvm,
|
||||||
|
|
||||||
int kvm_get_dirty_log_protect(struct kvm *kvm,
|
int kvm_get_dirty_log_protect(struct kvm *kvm,
|
||||||
struct kvm_dirty_log *log, bool *flush);
|
struct kvm_dirty_log *log, bool *flush);
|
||||||
|
int kvm_clear_dirty_log_protect(struct kvm *kvm,
|
||||||
|
struct kvm_clear_dirty_log *log, bool *flush);
|
||||||
|
|
||||||
void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
|
void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
|
||||||
struct kvm_memory_slot *slot,
|
struct kvm_memory_slot *slot,
|
||||||
|
@ -762,6 +765,8 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
|
||||||
|
|
||||||
int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
|
int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
|
||||||
struct kvm_dirty_log *log);
|
struct kvm_dirty_log *log);
|
||||||
|
int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm,
|
||||||
|
struct kvm_clear_dirty_log *log);
|
||||||
|
|
||||||
int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
|
int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
|
||||||
bool line_status);
|
bool line_status);
|
||||||
|
|
|
@ -492,6 +492,17 @@ struct kvm_dirty_log {
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* for KVM_CLEAR_DIRTY_LOG */
|
||||||
|
struct kvm_clear_dirty_log {
|
||||||
|
__u32 slot;
|
||||||
|
__u32 num_pages;
|
||||||
|
__u64 first_page;
|
||||||
|
union {
|
||||||
|
void __user *dirty_bitmap; /* one bit per page */
|
||||||
|
__u64 padding2;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
/* for KVM_SET_SIGNAL_MASK */
|
/* for KVM_SET_SIGNAL_MASK */
|
||||||
struct kvm_signal_mask {
|
struct kvm_signal_mask {
|
||||||
__u32 len;
|
__u32 len;
|
||||||
|
@ -975,6 +986,7 @@ struct kvm_ppc_resize_hpt {
|
||||||
#define KVM_CAP_HYPERV_ENLIGHTENED_VMCS 163
|
#define KVM_CAP_HYPERV_ENLIGHTENED_VMCS 163
|
||||||
#define KVM_CAP_EXCEPTION_PAYLOAD 164
|
#define KVM_CAP_EXCEPTION_PAYLOAD 164
|
||||||
#define KVM_CAP_ARM_VM_IPA_SIZE 165
|
#define KVM_CAP_ARM_VM_IPA_SIZE 165
|
||||||
|
#define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT 166
|
||||||
|
|
||||||
#ifdef KVM_CAP_IRQ_ROUTING
|
#ifdef KVM_CAP_IRQ_ROUTING
|
||||||
|
|
||||||
|
@ -1421,6 +1433,9 @@ struct kvm_enc_region {
|
||||||
#define KVM_GET_NESTED_STATE _IOWR(KVMIO, 0xbe, struct kvm_nested_state)
|
#define KVM_GET_NESTED_STATE _IOWR(KVMIO, 0xbe, struct kvm_nested_state)
|
||||||
#define KVM_SET_NESTED_STATE _IOW(KVMIO, 0xbf, struct kvm_nested_state)
|
#define KVM_SET_NESTED_STATE _IOW(KVMIO, 0xbf, struct kvm_nested_state)
|
||||||
|
|
||||||
|
/* Available with KVM_CAP_MANUAL_DIRTY_LOG_PROTECT */
|
||||||
|
#define KVM_CLEAR_DIRTY_LOG _IOWR(KVMIO, 0xc0, struct kvm_clear_dirty_log)
|
||||||
|
|
||||||
/* Secure Encrypted Virtualization command */
|
/* Secure Encrypted Virtualization command */
|
||||||
enum sev_cmd_id {
|
enum sev_cmd_id {
|
||||||
/* Guest initialization commands */
|
/* Guest initialization commands */
|
||||||
|
|
|
@ -16,8 +16,10 @@ TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test
|
||||||
TEST_GEN_PROGS_x86_64 += x86_64/state_test
|
TEST_GEN_PROGS_x86_64 += x86_64/state_test
|
||||||
TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
|
TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
|
||||||
TEST_GEN_PROGS_x86_64 += dirty_log_test
|
TEST_GEN_PROGS_x86_64 += dirty_log_test
|
||||||
|
TEST_GEN_PROGS_x86_64 += clear_dirty_log_test
|
||||||
|
|
||||||
TEST_GEN_PROGS_aarch64 += dirty_log_test
|
TEST_GEN_PROGS_aarch64 += dirty_log_test
|
||||||
|
TEST_GEN_PROGS_aarch64 += clear_dirty_log_test
|
||||||
|
|
||||||
TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
|
TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
|
||||||
LIBKVM += $(LIBKVM_$(UNAME_M))
|
LIBKVM += $(LIBKVM_$(UNAME_M))
|
||||||
|
|
2
tools/testing/selftests/kvm/clear_dirty_log_test.c
Normal file
2
tools/testing/selftests/kvm/clear_dirty_log_test.c
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
#define USE_CLEAR_DIRTY_LOG
|
||||||
|
#include "dirty_log_test.c"
|
|
@ -275,6 +275,14 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
|
||||||
|
|
||||||
vm = create_vm(mode, VCPU_ID, guest_num_pages, guest_code);
|
vm = create_vm(mode, VCPU_ID, guest_num_pages, guest_code);
|
||||||
|
|
||||||
|
#ifdef USE_CLEAR_DIRTY_LOG
|
||||||
|
struct kvm_enable_cap cap = {};
|
||||||
|
|
||||||
|
cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT;
|
||||||
|
cap.args[0] = 1;
|
||||||
|
vm_enable_cap(vm, &cap);
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Add an extra memory slot for testing dirty logging */
|
/* Add an extra memory slot for testing dirty logging */
|
||||||
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
|
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
|
||||||
guest_test_mem,
|
guest_test_mem,
|
||||||
|
@ -316,6 +324,10 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
|
||||||
/* Give the vcpu thread some time to dirty some pages */
|
/* Give the vcpu thread some time to dirty some pages */
|
||||||
usleep(interval * 1000);
|
usleep(interval * 1000);
|
||||||
kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
|
kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
|
||||||
|
#ifdef USE_CLEAR_DIRTY_LOG
|
||||||
|
kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0,
|
||||||
|
DIV_ROUND_UP(host_num_pages, 64) * 64);
|
||||||
|
#endif
|
||||||
vm_dirty_log_verify(bmap);
|
vm_dirty_log_verify(bmap);
|
||||||
iteration++;
|
iteration++;
|
||||||
sync_global_to_guest(vm, iteration);
|
sync_global_to_guest(vm, iteration);
|
||||||
|
@ -392,6 +404,13 @@ int main(int argc, char *argv[])
|
||||||
unsigned int mode;
|
unsigned int mode;
|
||||||
int opt, i;
|
int opt, i;
|
||||||
|
|
||||||
|
#ifdef USE_CLEAR_DIRTY_LOG
|
||||||
|
if (!kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT)) {
|
||||||
|
fprintf(stderr, "KVM_CLEAR_DIRTY_LOG not available, skipping tests\n");
|
||||||
|
exit(KSFT_SKIP);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
while ((opt = getopt(argc, argv, "hi:I:o:tm:")) != -1) {
|
while ((opt = getopt(argc, argv, "hi:I:o:tm:")) != -1) {
|
||||||
switch (opt) {
|
switch (opt) {
|
||||||
case 'i':
|
case 'i':
|
||||||
|
|
|
@ -58,6 +58,8 @@ void kvm_vm_free(struct kvm_vm *vmp);
|
||||||
void kvm_vm_restart(struct kvm_vm *vmp, int perm);
|
void kvm_vm_restart(struct kvm_vm *vmp, int perm);
|
||||||
void kvm_vm_release(struct kvm_vm *vmp);
|
void kvm_vm_release(struct kvm_vm *vmp);
|
||||||
void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log);
|
void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log);
|
||||||
|
void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
|
||||||
|
uint64_t first_page, uint32_t num_pages);
|
||||||
|
|
||||||
int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva,
|
int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva,
|
||||||
size_t len);
|
size_t len);
|
||||||
|
|
|
@ -231,6 +231,19 @@ void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log)
|
||||||
strerror(-ret));
|
strerror(-ret));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
|
||||||
|
uint64_t first_page, uint32_t num_pages)
|
||||||
|
{
|
||||||
|
struct kvm_clear_dirty_log args = { .dirty_bitmap = log, .slot = slot,
|
||||||
|
.first_page = first_page,
|
||||||
|
.num_pages = num_pages };
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = ioctl(vm->fd, KVM_CLEAR_DIRTY_LOG, &args);
|
||||||
|
TEST_ASSERT(ret == 0, "%s: KVM_CLEAR_DIRTY_LOG failed: %s",
|
||||||
|
strerror(-ret));
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Userspace Memory Region Find
|
* Userspace Memory Region Find
|
||||||
*
|
*
|
||||||
|
|
|
@ -1219,6 +1219,22 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, struct kvm_clear_dirty_log *log)
|
||||||
|
{
|
||||||
|
bool flush = false;
|
||||||
|
int r;
|
||||||
|
|
||||||
|
mutex_lock(&kvm->slots_lock);
|
||||||
|
|
||||||
|
r = kvm_clear_dirty_log_protect(kvm, log, &flush);
|
||||||
|
|
||||||
|
if (flush)
|
||||||
|
kvm_flush_remote_tlbs(kvm);
|
||||||
|
|
||||||
|
mutex_unlock(&kvm->slots_lock);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
|
static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
|
||||||
struct kvm_arm_device_addr *dev_addr)
|
struct kvm_arm_device_addr *dev_addr)
|
||||||
{
|
{
|
||||||
|
|
|
@ -1133,7 +1133,7 @@ EXPORT_SYMBOL_GPL(kvm_get_dirty_log);
|
||||||
#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
|
#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
|
||||||
/**
|
/**
|
||||||
* kvm_get_dirty_log_protect - get a snapshot of dirty pages, and if any pages
|
* kvm_get_dirty_log_protect - get a snapshot of dirty pages, and if any pages
|
||||||
* are dirty write protect them for next write.
|
* and reenable dirty page tracking for the corresponding pages.
|
||||||
* @kvm: pointer to kvm instance
|
* @kvm: pointer to kvm instance
|
||||||
* @log: slot id and address to which we copy the log
|
* @log: slot id and address to which we copy the log
|
||||||
* @is_dirty: flag set if any page is dirty
|
* @is_dirty: flag set if any page is dirty
|
||||||
|
@ -1176,37 +1176,114 @@ int kvm_get_dirty_log_protect(struct kvm *kvm,
|
||||||
return -ENOENT;
|
return -ENOENT;
|
||||||
|
|
||||||
n = kvm_dirty_bitmap_bytes(memslot);
|
n = kvm_dirty_bitmap_bytes(memslot);
|
||||||
|
|
||||||
dirty_bitmap_buffer = kvm_second_dirty_bitmap(memslot);
|
|
||||||
memset(dirty_bitmap_buffer, 0, n);
|
|
||||||
|
|
||||||
spin_lock(&kvm->mmu_lock);
|
|
||||||
*flush = false;
|
*flush = false;
|
||||||
for (i = 0; i < n / sizeof(long); i++) {
|
if (kvm->manual_dirty_log_protect) {
|
||||||
unsigned long mask;
|
/*
|
||||||
gfn_t offset;
|
* Unlike kvm_get_dirty_log, we always return false in *flush,
|
||||||
|
* because no flush is needed until KVM_CLEAR_DIRTY_LOG. There
|
||||||
|
* is some code duplication between this function and
|
||||||
|
* kvm_get_dirty_log, but hopefully all architecture
|
||||||
|
* transition to kvm_get_dirty_log_protect and kvm_get_dirty_log
|
||||||
|
* can be eliminated.
|
||||||
|
*/
|
||||||
|
dirty_bitmap_buffer = dirty_bitmap;
|
||||||
|
} else {
|
||||||
|
dirty_bitmap_buffer = kvm_second_dirty_bitmap(memslot);
|
||||||
|
memset(dirty_bitmap_buffer, 0, n);
|
||||||
|
|
||||||
if (!dirty_bitmap[i])
|
spin_lock(&kvm->mmu_lock);
|
||||||
continue;
|
for (i = 0; i < n / sizeof(long); i++) {
|
||||||
|
unsigned long mask;
|
||||||
|
gfn_t offset;
|
||||||
|
|
||||||
*flush = true;
|
if (!dirty_bitmap[i])
|
||||||
|
continue;
|
||||||
|
|
||||||
mask = xchg(&dirty_bitmap[i], 0);
|
*flush = true;
|
||||||
dirty_bitmap_buffer[i] = mask;
|
mask = xchg(&dirty_bitmap[i], 0);
|
||||||
|
dirty_bitmap_buffer[i] = mask;
|
||||||
|
|
||||||
if (mask) {
|
if (mask) {
|
||||||
offset = i * BITS_PER_LONG;
|
offset = i * BITS_PER_LONG;
|
||||||
kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot,
|
kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot,
|
||||||
offset, mask);
|
offset, mask);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
spin_unlock(&kvm->mmu_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
spin_unlock(&kvm->mmu_lock);
|
|
||||||
if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
|
if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(kvm_get_dirty_log_protect);
|
EXPORT_SYMBOL_GPL(kvm_get_dirty_log_protect);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* kvm_clear_dirty_log_protect - clear dirty bits in the bitmap
|
||||||
|
* and reenable dirty page tracking for the corresponding pages.
|
||||||
|
* @kvm: pointer to kvm instance
|
||||||
|
* @log: slot id and address from which to fetch the bitmap of dirty pages
|
||||||
|
*/
|
||||||
|
int kvm_clear_dirty_log_protect(struct kvm *kvm,
|
||||||
|
struct kvm_clear_dirty_log *log, bool *flush)
|
||||||
|
{
|
||||||
|
struct kvm_memslots *slots;
|
||||||
|
struct kvm_memory_slot *memslot;
|
||||||
|
int as_id, id, n;
|
||||||
|
gfn_t offset;
|
||||||
|
unsigned long i;
|
||||||
|
unsigned long *dirty_bitmap;
|
||||||
|
unsigned long *dirty_bitmap_buffer;
|
||||||
|
|
||||||
|
as_id = log->slot >> 16;
|
||||||
|
id = (u16)log->slot;
|
||||||
|
if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if ((log->first_page & 63) || (log->num_pages & 63))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
slots = __kvm_memslots(kvm, as_id);
|
||||||
|
memslot = id_to_memslot(slots, id);
|
||||||
|
|
||||||
|
dirty_bitmap = memslot->dirty_bitmap;
|
||||||
|
if (!dirty_bitmap)
|
||||||
|
return -ENOENT;
|
||||||
|
|
||||||
|
n = kvm_dirty_bitmap_bytes(memslot);
|
||||||
|
*flush = false;
|
||||||
|
dirty_bitmap_buffer = kvm_second_dirty_bitmap(memslot);
|
||||||
|
if (copy_from_user(dirty_bitmap_buffer, log->dirty_bitmap, n))
|
||||||
|
return -EFAULT;
|
||||||
|
|
||||||
|
spin_lock(&kvm->mmu_lock);
|
||||||
|
for (offset = log->first_page,
|
||||||
|
i = offset / BITS_PER_LONG, n = log->num_pages / BITS_PER_LONG; n--;
|
||||||
|
i++, offset += BITS_PER_LONG) {
|
||||||
|
unsigned long mask = *dirty_bitmap_buffer++;
|
||||||
|
atomic_long_t *p = (atomic_long_t *) &dirty_bitmap[i];
|
||||||
|
if (!mask)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
mask &= atomic_long_fetch_andnot(mask, p);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* mask contains the bits that really have been cleared. This
|
||||||
|
* never includes any bits beyond the length of the memslot (if
|
||||||
|
* the length is not aligned to 64 pages), therefore it is not
|
||||||
|
* a problem if userspace sets them in log->dirty_bitmap.
|
||||||
|
*/
|
||||||
|
if (mask) {
|
||||||
|
*flush = true;
|
||||||
|
kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot,
|
||||||
|
offset, mask);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
spin_unlock(&kvm->mmu_lock);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(kvm_clear_dirty_log_protect);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
bool kvm_largepages_enabled(void)
|
bool kvm_largepages_enabled(void)
|
||||||
|
@ -2949,6 +3026,9 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
|
||||||
case KVM_CAP_IOEVENTFD_ANY_LENGTH:
|
case KVM_CAP_IOEVENTFD_ANY_LENGTH:
|
||||||
case KVM_CAP_CHECK_EXTENSION_VM:
|
case KVM_CAP_CHECK_EXTENSION_VM:
|
||||||
case KVM_CAP_ENABLE_CAP_VM:
|
case KVM_CAP_ENABLE_CAP_VM:
|
||||||
|
#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
|
||||||
|
case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT:
|
||||||
|
#endif
|
||||||
return 1;
|
return 1;
|
||||||
#ifdef CONFIG_KVM_MMIO
|
#ifdef CONFIG_KVM_MMIO
|
||||||
case KVM_CAP_COALESCED_MMIO:
|
case KVM_CAP_COALESCED_MMIO:
|
||||||
|
@ -2982,6 +3062,13 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm,
|
||||||
struct kvm_enable_cap *cap)
|
struct kvm_enable_cap *cap)
|
||||||
{
|
{
|
||||||
switch (cap->cap) {
|
switch (cap->cap) {
|
||||||
|
#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
|
||||||
|
case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT:
|
||||||
|
if (cap->flags || (cap->args[0] & ~1))
|
||||||
|
return -EINVAL;
|
||||||
|
kvm->manual_dirty_log_protect = cap->args[0];
|
||||||
|
return 0;
|
||||||
|
#endif
|
||||||
default:
|
default:
|
||||||
return kvm_vm_ioctl_enable_cap(kvm, cap);
|
return kvm_vm_ioctl_enable_cap(kvm, cap);
|
||||||
}
|
}
|
||||||
|
@ -3029,6 +3116,17 @@ static long kvm_vm_ioctl(struct file *filp,
|
||||||
r = kvm_vm_ioctl_get_dirty_log(kvm, &log);
|
r = kvm_vm_ioctl_get_dirty_log(kvm, &log);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
|
||||||
|
case KVM_CLEAR_DIRTY_LOG: {
|
||||||
|
struct kvm_clear_dirty_log log;
|
||||||
|
|
||||||
|
r = -EFAULT;
|
||||||
|
if (copy_from_user(&log, argp, sizeof(log)))
|
||||||
|
goto out;
|
||||||
|
r = kvm_vm_ioctl_clear_dirty_log(kvm, &log);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#ifdef CONFIG_KVM_MMIO
|
#ifdef CONFIG_KVM_MMIO
|
||||||
case KVM_REGISTER_COALESCED_MMIO: {
|
case KVM_REGISTER_COALESCED_MMIO: {
|
||||||
struct kvm_coalesced_mmio_zone zone;
|
struct kvm_coalesced_mmio_zone zone;
|
||||||
|
|
Loading…
Reference in New Issue
Block a user