https://git.reactos.org/?p=reactos.git;a=commitdiff;h=abb338b13d1fce57c045f…
commit abb338b13d1fce57c045f38ca759ede1da3fa2dc
Author: Timo Kreuzer <timo.kreuzer(a)reactos.org>
AuthorDate: Fri Mar 2 08:02:13 2018 +0100
Commit: Timo Kreuzer <timo.kreuzer(a)reactos.org>
CommitDate: Sat May 29 21:20:48 2021 +0200
[RTL/x64] Improve RtlCaptureContext
Use movaps instead of movdqa, it does the same thing, but is one byte shorter.
Shuffle instructions around a bit to maximize parallel execution.
---
sdk/lib/rtl/amd64/except_asm.S | 118 ++++++++++++++++++++++-------------------
1 file changed, 63 insertions(+), 55 deletions(-)
diff --git a/sdk/lib/rtl/amd64/except_asm.S b/sdk/lib/rtl/amd64/except_asm.S
index b56cc48f51c..ea2eb7f88c8 100644
--- a/sdk/lib/rtl/amd64/except_asm.S
+++ b/sdk/lib/rtl/amd64/except_asm.S
@@ -16,9 +16,9 @@
.code64
/*
- * VOID NTAPI
+ * VOID
* RtlCaptureContext(
- * PCONTEXT ContextRecord); <rcx>
+ * _Out_ PCONTEXT ContextRecord@<rcx>);
*/
PUBLIC RtlCaptureContext
.PROC RtlCaptureContext
@@ -28,70 +28,78 @@ PUBLIC RtlCaptureContext
.ALLOCSTACK 8
.ENDPROLOG
- /* Save the basic register context */
- mov [rcx + CONTEXT_Rax], rax
- mov [rcx + CONTEXT_Rcx], rcx
- mov [rcx + CONTEXT_Rdx], rdx
+ /* Save rax first, we use it later to copy some data */
+ mov [rcx + CxRax], rax
- /* Load rflags into rax */
- mov rax, [rsp]
+ /* Set ContextFlags */
+ mov dword ptr [rcx + CxContextFlags], (CONTEXT_FULL or CONTEXT_SEGMENTS)
- mov [rcx + CONTEXT_Rbx], rbx
- mov [rcx + CONTEXT_Rsi], rsi
- mov [rcx + CONTEXT_Rdi], rdi
+ /* Store the basic register context */
+ mov [rcx + CxRcx], rcx
+ mov [rcx + CxRdx], rdx
+ mov [rcx + CxRbx], rbx
+ mov [rcx + CxRsi], rsi
- /* Store rflags */
- mov [rcx + CONTEXT_EFlags], rax
+ /* Load return address in rax */
+ mov rax, [rsp + 8]
+
+ mov [rcx + CxRdi], rdi
+ mov [rcx + CxRbp], rbp
+ mov [rcx + CxR8], r8
+ mov [rcx + CxR9], r9
+ mov [rcx + CxR10], r10
+
+ /* Store the return address */
+ mov [rcx + CxRip], rax
- mov [rcx + CONTEXT_Rbp], rbp
- mov [rcx + CONTEXT_R8], r8
- mov [rcx + CONTEXT_R9], r9
+ mov [rcx + CxR11], r11
+ mov [rcx + CxR12], r12
+ mov [rcx + CxR13], r13
+ mov [rcx + CxR14], r14
+ mov [rcx + CxR15], r15
/* Load former stack pointer in rax */
lea rax, [rsp + 16]
- mov [rcx + CONTEXT_R10], r10
- mov [rcx + CONTEXT_R11], r11
- mov [rcx + CONTEXT_R12], r12
+ /* Store segment selectors */
+ mov [rcx + CxSegCs], cs
+ mov [rcx + CxSegDs], ds
+ mov [rcx + CxSegEs], es
+ mov [rcx + CxSegFs], fs
+ mov [rcx + CxSegGs], gs
+ mov [rcx + CxSegSs], ss
/* Store stack pointer */
- mov [rcx + CONTEXT_Rsp], rax
+ mov [rcx + CxRsp], rax
+
+ /* Store xmm registers */
+ movaps [rcx + CxXmm0], xmm0
+ movaps [rcx + CxXmm1], xmm1
+ movaps [rcx + CxXmm2], xmm2
+ movaps [rcx + CxXmm3], xmm3
+ movaps [rcx + CxXmm4], xmm4
+ movaps [rcx + CxXmm5], xmm5
+ movaps [rcx + CxXmm6], xmm6
+ movaps [rcx + CxXmm7], xmm7
+
+ /* Load rflags into eax */
+ mov eax, [rsp]
+
+ movaps [rcx + CxXmm8], xmm8
+ movaps [rcx + CxXmm9], xmm9
+ movaps [rcx + CxXmm10], xmm10
+ movaps [rcx + CxXmm11], xmm11
+ movaps [rcx + CxXmm12], xmm12
+ movaps [rcx + CxXmm13], xmm13
+ movaps [rcx + CxXmm14], xmm14
+ movaps [rcx + CxXmm15], xmm15
+
+ /* Store legacy floating point registers */
+ fxsave [rcx + CxFltSave]
+ stmxcsr [rcx + CxMxCsr]
- mov [rcx + CONTEXT_R13], r13
- mov [rcx + CONTEXT_R14], r14
- mov [rcx + CONTEXT_R15], r15
-
- /* Load return address in rax */
- mov rax, [rsp + 8]
-
- /* Safe segment selectors */
- mov [rcx + CONTEXT_SegCs], cs
- mov [rcx + CONTEXT_SegDs], ds
- mov [rcx + CONTEXT_SegEs], es
- mov [rcx + CONTEXT_SegFs], fs
- mov [rcx + CONTEXT_SegGs], gs
- mov [rcx + CONTEXT_SegSs], ss
-
- /* Store return address */
- mov [rcx + CONTEXT_Rip], rax
-
- /* Safe xmm registers */
- movdqa [rcx + CONTEXT_Xmm0], xmm0
- movdqa [rcx + CONTEXT_Xmm1], xmm1
- movdqa [rcx + CONTEXT_Xmm2], xmm2
- movdqa [rcx + CONTEXT_Xmm3], xmm3
- movdqa [rcx + CONTEXT_Xmm4], xmm4
- movdqa [rcx + CONTEXT_Xmm5], xmm5
- movdqa [rcx + CONTEXT_Xmm6], xmm6
- movdqa [rcx + CONTEXT_Xmm7], xmm7
- movdqa [rcx + CONTEXT_Xmm8], xmm8
- movdqa [rcx + CONTEXT_Xmm9], xmm9
- movdqa [rcx + CONTEXT_Xmm10], xmm10
- movdqa [rcx + CONTEXT_Xmm11], xmm11
- movdqa [rcx + CONTEXT_Xmm12], xmm12
- movdqa [rcx + CONTEXT_Xmm13], xmm13
- movdqa [rcx + CONTEXT_Xmm14], xmm14
- movdqa [rcx + CONTEXT_Xmm15], xmm15
+ /* Store rflags */
+ mov [rcx + CxEFlags], eax
/* Cleanup stack and return */
add rsp, 8