KVM: VMX: Explicitly initialize controls shadow at VMCS allocation

This change “KVM: VMX: Explicitly initialize controls shadow at VMCS allocation” (commit 3af80fe) in Linux kernel is authored by Sean Christopherson <sean.j.christopherson [at] intel.com> on Tue May 7 12:18:00 2019 -0700.

Description of "KVM: VMX: Explicitly initialize controls shadow at VMCS allocation"

The change “KVM: VMX: Explicitly initialize controls shadow at VMCS allocation” introduces changes as follows.

KVM: VMX: Explicitly initialize controls shadow at VMCS allocation

Or: Don't re-initialize vmcs02's controls on every nested VM-Entry.

VMWRITEs to the major VMCS controls are deceptively expensive.  Intel
CPUs with VMCS caching (Westmere and later) also optimize away
consistency checks on VM-Entry, i.e. skip consistency checks if the
relevant fields have not changed since the last successful VM-Entry (of
the cached VMCS).  Because uops are a precious commodity, uCode's dirty
VMCS field tracking isn't as precise as software would prefer.  Notably,
writing any of the major VMCS fields effectively marks the entire VMCS
dirty, i.e. causes the next VM-Entry to perform all consistency checks,
which consumes several hundred cycles.

Zero out the controls' shadow copies during VMCS allocation and use the
optimized setter when "initializing" controls.  While this technically
affects both non-nested and nested virtualization, nested virtualization
is the primary beneficiary as avoid VMWRITEs when prepare vmcs02 allows
hardware to optimizie away consistency checks.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

Linux kernel releases containing commit 3af80fe

The Linux kernel releases containing this commit are as follows.

Linux kernel code changes from "KVM: VMX: Explicitly initialize controls shadow at VMCS allocation"

There are 27 lines of Linux source code added/deleted in this change. Code changes to Linux kernel are as follows.

 arch/x86/kvm/vmx/nested.c | 10 +++++-----
 arch/x86/kvm/vmx/vmx.c    | 12 +++++++-----
 arch/x86/kvm/vmx/vmx.h    |  5 -----
 3 files changed, 12 insertions(+), 15 deletions(-)
 
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index d4f529a2e194..3f76a1f3fe3c 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2024,7 +2024,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
 	} else {
 		exec_control &= ~PIN_BASED_POSTED_INTR;
 	}
-	pin_controls_init(vmx, exec_control);
+	pin_controls_set(vmx, exec_control);
 
 	/*
 	 * EXEC CONTROLS
@@ -2049,7 +2049,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
 	 */
 	exec_control &= ~CPU_BASED_USE_IO_BITMAPS;
 	exec_control |= CPU_BASED_UNCOND_IO_EXITING;
-	exec_controls_init(vmx, exec_control);
+	exec_controls_set(vmx, exec_control);
 
 	/*
 	 * SECONDARY EXEC CONTROLS
@@ -2079,7 +2079,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
 			vmcs_write16(GUEST_INTR_STATUS,
 				vmcs12->guest_intr_status);
 
-		secondary_exec_controls_init(vmx, exec_control);
+		secondary_exec_controls_set(vmx, exec_control);
 	}
 
 	/*
@@ -2098,7 +2098,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
 		if (guest_efer != host_efer)
 			exec_control |= VM_ENTRY_LOAD_IA32_EFER;
 	}
-	vm_entry_controls_init(vmx, exec_control);
+	vm_entry_controls_set(vmx, exec_control);
 
 	/*
 	 * EXIT CONTROLS
@@ -2110,7 +2110,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
 	exec_control = vmx_vmexit_ctrl();
 	if (cpu_has_load_ia32_efer() && guest_efer != host_efer)
 		exec_control |= VM_EXIT_LOAD_IA32_EFER;
-	vm_exit_controls_init(vmx, exec_control);
+	vm_exit_controls_set(vmx, exec_control);
 
 	/*
 	 * Interrupt/Exception Fields
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 8c1cbc19af97..bae376bf9c20 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2485,6 +2485,8 @@ int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
 	}
 
 	memset(&loaded_vmcs->host_state, 0, sizeof(struct vmcs_host_state));
+	memset(&loaded_vmcs->controls_shadow, 0,
+		sizeof(struct vmcs_controls_shadow));
 
 	return 0;
 
@@ -4040,14 +4042,14 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
 	vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
 
 	/* Control */
-	pin_controls_init(vmx, vmx_pin_based_exec_ctrl(vmx));
+	pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx));
 	vmx->hv_deadline_tsc = -1;
 
-	exec_controls_init(vmx, vmx_exec_control(vmx));
+	exec_controls_set(vmx, vmx_exec_control(vmx));
 
 	if (cpu_has_secondary_exec_ctrls()) {
 		vmx_compute_secondary_exec_control(vmx);
-		secondary_exec_controls_init(vmx, vmx->secondary_exec_control);
+		secondary_exec_controls_set(vmx, vmx->secondary_exec_control);
 	}
 
 	if (kvm_vcpu_apicv_active(&vmx->vcpu)) {
@@ -4105,10 +4107,10 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
 		++vmx->nmsrs;
 	}
 
-	vm_exit_controls_init(vmx, vmx_vmexit_ctrl());
+	vm_exit_controls_set(vmx, vmx_vmexit_ctrl());
 
 	/* 22.2.1, 20.8.1 */
-	vm_entry_controls_init(vmx, vmx_vmentry_ctrl());
+	vm_entry_controls_set(vmx, vmx_vmentry_ctrl());
 
 	vmx->vcpu.arch.cr0_guest_owned_bits = X86_CR0_TS;
 	vmcs_writel(CR0_GUEST_HOST_MASK, ~X86_CR0_TS);
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 52d7bc90d9ef..82d0bc3a4d52 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -388,11 +388,6 @@ static inline u8 vmx_get_rvi(void)
 }
 
 #define BUILD_CONTROLS_SHADOW(lname, uname)				    \
-static inline void lname##_controls_init(struct vcpu_vmx *vmx, u32 val)	    \
-{									    \
-	vmcs_write32(uname, val);					    \
-	vmx->loaded_vmcs->controls_shadow.lname = val;			    \
-}									    \
 static inline void lname##_controls_set(struct vcpu_vmx *vmx, u32 val)	    \
 {									    \
 	if (vmx->loaded_vmcs->controls_shadow.lname != val) {		    \

The commit for this change in Linux stable tree is 3af80fe (patch).

Last modified: 2020/02/08