https://git.reactos.org/?p=reactos.git;a=commitdiff;h=064a35dc6717962a7b197…
commit 064a35dc6717962a7b1973661566d53f6da06155
Author: George Bișoc <george.bisoc(a)reactos.org>
AuthorDate: Tue Jan 25 13:19:06 2022 +0100
Commit: George Bișoc <george.bisoc(a)reactos.org>
CommitDate: Tue May 24 18:39:45 2022 +0200
[NTOS:KE] Fully implement FPU Save/Restore mechanism
This commit fully implements the inner logic of KeSaveFloatingPointState and
KeRestoreFloatingPointState routines. On ReactOS we're currently simply doing a FNSAVE
operation whereas on Windows it is a lot more than that.
On Windows Server 2003 the logic more or less goes like this. In order to save the FPU
state the NPX state of the current thread has to be checked first, that is, if NPX is
loaded and currently charged for the current thread then the system will acquire the NPX
registers actively present. From that point it performs either a FNSAVE or FXSAVE
if FX is actually supported. Otherwise the control word and MXCsr registers are
obtained.
FXSAVE/FNSAVE operation is done solely if the FX save area is held up in a pool
allocation. Pool allocation occurs if it's been found out that the NPX IRQL of the
thread is not the same as the current thread which from where it determines if the
interrupt level is APC then allocate some pool memory and hold the save area there,
otherwise
the save area in question is grabbed from the current processor control region. If NPX
is not loaded for the current thread then the FPU state is obtained from the NPX frame.
In our case we'll be doing something way simpler. Only do a FXSAVE/FNSAVE directly
of the FPU state registers, in this way we are simplifying the code and the actual logic
of Save/Restore mechanism.
---
ntoskrnl/ke/i386/cpu.c | 245 ++++++++++++++++++++++++++++++++++++++++++-------
1 file changed, 210 insertions(+), 35 deletions(-)
diff --git a/ntoskrnl/ke/i386/cpu.c b/ntoskrnl/ke/i386/cpu.c
index 0e4b8fa8468..eac0bf42680 100644
--- a/ntoskrnl/ke/i386/cpu.c
+++ b/ntoskrnl/ke/i386/cpu.c
@@ -12,6 +12,8 @@
#define NDEBUG
#include <debug.h>
+#include <xmmintrin.h>
+
/* GLOBALS *******************************************************************/
/* The TSS to use for Double Fault Traps (INT 0x9) */
@@ -75,6 +77,9 @@ typedef union _CPU_SIGNATURE
ULONG AsULONG;
} CPU_SIGNATURE;
+/* FX area alignment size */
+#define FXSAVE_ALIGN 15
+
/* SUPPORT ROUTINES FOR MSVC COMPATIBILITY ***********************************/
/* NSC/Cyrix CPU configuration register index */
@@ -97,7 +102,6 @@ setCx86(UCHAR reg, UCHAR data)
WRITE_PORT_UCHAR((PUCHAR)(ULONG_PTR)0x23, data);
}
-
/* FUNCTIONS *****************************************************************/
CODE_SEG("INIT")
@@ -1306,59 +1310,230 @@ KiCoprocessorError(VOID)
__writecr0(__readcr0() | CR0_TS);
}
-/*
- * @implemented
+/**
+ * @brief
+ * Saves the current floating point unit state
+ * context of the current calling thread.
+ *
+ * @param[out] Save
+ * The saved floating point context given to the
+ * caller at the end of function's operations.
+ * The structure whose data contents are opaque
+ * to the calling thread.
+ *
+ * @return
+ * Returns STATUS_SUCCESS if the function has
+ * successfully completed its operations.
+ * STATUS_INSUFFICIENT_RESOURCES is returned
+ * if the function couldn't allocate memory
+ * for FPU state information.
+ *
+ * @remarks
+ * The function performs a FPU state save
+ * in two ways. A normal FPU save (FNSAVE)
+ * is performed if the system doesn't have
+ * SSE/SSE2, otherwise the function performs
+ * a save of FPU, MMX and SSE states save (FXSAVE).
*/
+#if defined(__clang__)
+__attribute__((__target__("sse")))
+#endif
NTSTATUS
NTAPI
-KeSaveFloatingPointState(OUT PKFLOATING_SAVE Save)
+KeSaveFloatingPointState(
+ _Out_ PKFLOATING_SAVE Save)
{
- PFNSAVE_FORMAT FpState;
+ PFLOATING_SAVE_CONTEXT FsContext;
+ PFX_SAVE_AREA FxSaveAreaFrame;
+ PKPRCB CurrentPrcb;
+
+ /* Sanity checks */
+ ASSERT(Save);
ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL);
- UNIMPLEMENTED_ONCE;
+ ASSERT(KeI386NpxPresent);
- FpState = ExAllocatePool(NonPagedPool, sizeof (FNSAVE_FORMAT));
- if (!FpState) return STATUS_INSUFFICIENT_RESOURCES;
+ /* Initialize the floating point context */
+ FsContext = ExAllocatePoolWithTag(NonPagedPool,
+ sizeof(FLOATING_SAVE_CONTEXT),
+ TAG_FLOATING_POINT_CONTEXT);
+ if (!FsContext)
+ {
+ /* Bail out if we failed */
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
- *((PVOID *) Save) = FpState;
-#ifdef __GNUC__
- asm volatile("fnsave %0\n\t" : "=m" (*FpState));
-#else
- __asm
+ /*
+ * Allocate some memory pool for the buffer. The size
+ * of this allocated buffer is the FX area plus the
+ * alignment requirement needed for FXSAVE as a 16-byte
+ * aligned pointer is compulsory in order to save the
+ * FPU state.
+ */
+ FsContext->Buffer = ExAllocatePoolWithTag(NonPagedPool,
+ sizeof(FX_SAVE_AREA) + FXSAVE_ALIGN,
+ TAG_FLOATING_POINT_FX);
+ if (!FsContext->Buffer)
{
- mov eax, [FpState]
- fnsave [eax]
- };
-#endif
+ /* Bail out if we failed */
+ ExFreePoolWithTag(FsContext, TAG_FLOATING_POINT_CONTEXT);
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ /*
+ * Now cache the allocated buffer into the save area
+ * and align the said area to a 16-byte boundary. Why
+ * do we have to do this is because of ExAllocate function.
+ * We gave the necessary alignment requirement in the pool
+ * allocation size although the function will always return
+ * a 8-byte aligned pointer. Aligning the given pointer directly
+ * can cause issues when freeing it from memory afterwards. With
+ * that said, we have to cache the buffer to the area so that we
+ * do not touch or mess the allocated buffer any further.
+ */
+ FsContext->PfxSaveArea = ALIGN_UP_POINTER_BY(FsContext->Buffer, 16);
+
+ /* Disable interrupts and get the current processor control region */
+ _disable();
+ CurrentPrcb = KeGetCurrentPrcb();
+
+ /* Store the current thread to context */
+ FsContext->CurrentThread = KeGetCurrentThread();
+
+ /*
+ * Save the previous NPX thread state registers (aka Numeric
+ * Processor eXtension) into the current context so that
+ * we are informing the scheduler the current FPU state
+ * belongs to this thread.
+ */
+ if (FsContext->CurrentThread != CurrentPrcb->NpxThread)
+ {
+ if ((CurrentPrcb->NpxThread != NULL) &&
+ (CurrentPrcb->NpxThread->NpxState == NPX_STATE_LOADED))
+ {
+ /* Get the FX frame */
+ FxSaveAreaFrame = KiGetThreadNpxArea(CurrentPrcb->NpxThread);
+
+ /* Save the FPU state */
+ Ke386SaveFpuState(FxSaveAreaFrame);
+
+ /* NPX thread has lost its state */
+ CurrentPrcb->NpxThread->NpxState = NPX_STATE_NOT_LOADED;
+ FxSaveAreaFrame->NpxSavedCpu = 0;
+ }
+
+ /* The new NPX thread is the current thread */
+ CurrentPrcb->NpxThread = FsContext->CurrentThread;
+ }
+
+ /* Perform the save */
+ Ke386SaveFpuState(FsContext->PfxSaveArea);
- KeGetCurrentThread()->Header.NpxIrql = KeGetCurrentIrql();
+ /* Store the NPX IRQL */
+ FsContext->OldNpxIrql = FsContext->CurrentThread->Header.NpxIrql;
+
+ /* Set the current IRQL to NPX */
+ FsContext->CurrentThread->Header.NpxIrql = KeGetCurrentIrql();
+
+ /* Initialize the FPU */
+ Ke386FnInit();
+
+ /* Enable interrupts back */
+ _enable();
+
+ /* Give the saved FPU context to the caller */
+ *((PVOID *) Save) = FsContext;
return STATUS_SUCCESS;
}
-/*
- * @implemented
+/**
+ * @brief
+ * Restores the original FPU state context that has
+ * been saved by a API call of KeSaveFloatingPointState.
+ * Callers are expected to restore the floating point
+ * state by calling this function when they've finished
+ * doing FPU operations.
+ *
+ * @param[in] Save
+ * The saved floating point context that is to be given
+ * to the function to restore the FPU state.
+ *
+ * @return
+ * Returns STATUS_SUCCESS indicating the function
+ * has fully completed its operations.
*/
+#if defined(__clang__)
+__attribute__((__target__("sse")))
+#endif
NTSTATUS
NTAPI
-KeRestoreFloatingPointState(IN PKFLOATING_SAVE Save)
+KeRestoreFloatingPointState(
+ _In_ PKFLOATING_SAVE Save)
{
- PFNSAVE_FORMAT FpState = *((PVOID *) Save);
- ASSERT(KeGetCurrentThread()->Header.NpxIrql == KeGetCurrentIrql());
- UNIMPLEMENTED_ONCE;
+ PFLOATING_SAVE_CONTEXT FsContext;
-#ifdef __GNUC__
- asm volatile("fnclex\n\t");
- asm volatile("frstor %0\n\t" : "=m" (*FpState));
-#else
- __asm
+ /* Sanity checks */
+ ASSERT(Save);
+ ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL);
+ ASSERT(KeI386NpxPresent);
+
+ /* Cache the saved FS context */
+ FsContext = *((PVOID *) Save);
+
+ /*
+ * We have to restore the regular saved FPU
+ * state. For this we must first do some
+ * validation checks so that we are sure
+ * ourselves the state context is saved
+ * properly. Check if we are in the same
+ * calling thread.
+ */
+ if (FsContext->CurrentThread != KeGetCurrentThread())
{
- fnclex
- mov eax, [FpState]
- frstor [eax]
- };
-#endif
+ /*
+ * This isn't the thread that saved the
+ * FPU state context, crash the system!
+ */
+ KeBugCheckEx(INVALID_FLOATING_POINT_STATE,
+ 0x2,
+ (ULONG_PTR)FsContext->CurrentThread,
+ (ULONG_PTR)KeGetCurrentThread(),
+ 0);
+ }
+
+ /* Are we under the same NPX interrupt level? */
+ if (FsContext->CurrentThread->Header.NpxIrql != KeGetCurrentIrql())
+ {
+ /* The interrupt level has changed, crash the system! */
+ KeBugCheckEx(INVALID_FLOATING_POINT_STATE,
+ 0x1,
+ (ULONG_PTR)FsContext->CurrentThread->Header.NpxIrql,
+ (ULONG_PTR)KeGetCurrentIrql(),
+ 0);
+ }
+
+ /* Disable interrupts */
+ _disable();
+
+ /*
+ * The saved FPU state context is valid,
+ * it's time to restore the state. First,
+ * clear FPU exceptions now.
+ */
+ Ke386ClearFpExceptions();
+
+ /* Restore the state */
+ Ke386RestoreFpuState(FsContext->PfxSaveArea);
+
+ /* Give the saved NPX IRQL back to the NPX thread */
+ FsContext->CurrentThread->Header.NpxIrql = FsContext->OldNpxIrql;
+
+ /* Enable interrupts back */
+ _enable();
+
+ /* We're done, free the allocated area and context */
+ ExFreePoolWithTag(FsContext->Buffer, TAG_FLOATING_POINT_FX);
+ ExFreePoolWithTag(FsContext, TAG_FLOATING_POINT_CONTEXT);
- ExFreePool(FpState);
return STATUS_SUCCESS;
}