https://git.reactos.org/?p=reactos.git;a=commitdiff;h=abb338b13d1fce57c045f3...
commit abb338b13d1fce57c045f38ca759ede1da3fa2dc Author: Timo Kreuzer timo.kreuzer@reactos.org AuthorDate: Fri Mar 2 08:02:13 2018 +0100 Commit: Timo Kreuzer timo.kreuzer@reactos.org CommitDate: Sat May 29 21:20:48 2021 +0200
[RTL/x64] Improve RtlCaptureContext
Use movaps instead of movdqa, it does the same thing, but is one byte shorter. Shuffle instructions around a bit to maximize parallel execution. --- sdk/lib/rtl/amd64/except_asm.S | 118 ++++++++++++++++++++++------------------- 1 file changed, 63 insertions(+), 55 deletions(-)
diff --git a/sdk/lib/rtl/amd64/except_asm.S b/sdk/lib/rtl/amd64/except_asm.S index b56cc48f51c..ea2eb7f88c8 100644 --- a/sdk/lib/rtl/amd64/except_asm.S +++ b/sdk/lib/rtl/amd64/except_asm.S @@ -16,9 +16,9 @@ .code64
/* - * VOID NTAPI + * VOID * RtlCaptureContext( - * PCONTEXT ContextRecord); <rcx> + * _Out_ PCONTEXT ContextRecord@<rcx>); */ PUBLIC RtlCaptureContext .PROC RtlCaptureContext @@ -28,70 +28,78 @@ PUBLIC RtlCaptureContext .ALLOCSTACK 8 .ENDPROLOG
- /* Save the basic register context */ - mov [rcx + CONTEXT_Rax], rax - mov [rcx + CONTEXT_Rcx], rcx - mov [rcx + CONTEXT_Rdx], rdx + /* Save rax first, we use it later to copy some data */ + mov [rcx + CxRax], rax
- /* Load rflags into rax */ - mov rax, [rsp] + /* Set ContextFlags */ + mov dword ptr [rcx + CxContextFlags], (CONTEXT_FULL or CONTEXT_SEGMENTS)
- mov [rcx + CONTEXT_Rbx], rbx - mov [rcx + CONTEXT_Rsi], rsi - mov [rcx + CONTEXT_Rdi], rdi + /* Store the basic register context */ + mov [rcx + CxRcx], rcx + mov [rcx + CxRdx], rdx + mov [rcx + CxRbx], rbx + mov [rcx + CxRsi], rsi
- /* Store rflags */ - mov [rcx + CONTEXT_EFlags], rax + /* Load return address in rax */ + mov rax, [rsp + 8] + + mov [rcx + CxRdi], rdi + mov [rcx + CxRbp], rbp + mov [rcx + CxR8], r8 + mov [rcx + CxR9], r9 + mov [rcx + CxR10], r10 + + /* Store the return address */ + mov [rcx + CxRip], rax
- mov [rcx + CONTEXT_Rbp], rbp - mov [rcx + CONTEXT_R8], r8 - mov [rcx + CONTEXT_R9], r9 + mov [rcx + CxR11], r11 + mov [rcx + CxR12], r12 + mov [rcx + CxR13], r13 + mov [rcx + CxR14], r14 + mov [rcx + CxR15], r15
/* Load former stack pointer in rax */ lea rax, [rsp + 16]
- mov [rcx + CONTEXT_R10], r10 - mov [rcx + CONTEXT_R11], r11 - mov [rcx + CONTEXT_R12], r12 + /* Store segment selectors */ + mov [rcx + CxSegCs], cs + mov [rcx + CxSegDs], ds + mov [rcx + CxSegEs], es + mov [rcx + CxSegFs], fs + mov [rcx + CxSegGs], gs + mov [rcx + CxSegSs], ss
/* Store stack pointer */ - mov [rcx + CONTEXT_Rsp], rax + mov [rcx + CxRsp], rax + + /* Store xmm registers */ + movaps [rcx + CxXmm0], xmm0 + movaps [rcx + CxXmm1], xmm1 + movaps [rcx + CxXmm2], xmm2 + movaps [rcx + CxXmm3], xmm3 + movaps [rcx + CxXmm4], xmm4 + movaps [rcx + CxXmm5], xmm5 + movaps [rcx + CxXmm6], xmm6 + movaps [rcx + CxXmm7], xmm7 + + /* Load rflags into eax */ + mov eax, [rsp] + + movaps [rcx + CxXmm8], xmm8 + movaps [rcx + CxXmm9], xmm9 + movaps [rcx + CxXmm10], xmm10 + movaps [rcx + CxXmm11], xmm11 + movaps [rcx + CxXmm12], xmm12 + movaps [rcx + CxXmm13], xmm13 + movaps [rcx + CxXmm14], xmm14 + movaps [rcx + CxXmm15], xmm15 + + /* Store legacy floating point registers */ + fxsave [rcx + CxFltSave] + stmxcsr [rcx + CxMxCsr]
- mov [rcx + CONTEXT_R13], r13 - mov [rcx + CONTEXT_R14], r14 - mov [rcx + CONTEXT_R15], r15 - - /* Load return address in rax */ - mov rax, [rsp + 8] - - /* Safe segment selectors */ - mov [rcx + CONTEXT_SegCs], cs - mov [rcx + CONTEXT_SegDs], ds - mov [rcx + CONTEXT_SegEs], es - mov [rcx + CONTEXT_SegFs], fs - mov [rcx + CONTEXT_SegGs], gs - mov [rcx + CONTEXT_SegSs], ss - - /* Store return address */ - mov [rcx + CONTEXT_Rip], rax - - /* Safe xmm registers */ - movdqa [rcx + CONTEXT_Xmm0], xmm0 - movdqa [rcx + CONTEXT_Xmm1], xmm1 - movdqa [rcx + CONTEXT_Xmm2], xmm2 - movdqa [rcx + CONTEXT_Xmm3], xmm3 - movdqa [rcx + CONTEXT_Xmm4], xmm4 - movdqa [rcx + CONTEXT_Xmm5], xmm5 - movdqa [rcx + CONTEXT_Xmm6], xmm6 - movdqa [rcx + CONTEXT_Xmm7], xmm7 - movdqa [rcx + CONTEXT_Xmm8], xmm8 - movdqa [rcx + CONTEXT_Xmm9], xmm9 - movdqa [rcx + CONTEXT_Xmm10], xmm10 - movdqa [rcx + CONTEXT_Xmm11], xmm11 - movdqa [rcx + CONTEXT_Xmm12], xmm12 - movdqa [rcx + CONTEXT_Xmm13], xmm13 - movdqa [rcx + CONTEXT_Xmm14], xmm14 - movdqa [rcx + CONTEXT_Xmm15], xmm15 + /* Store rflags */ + mov [rcx + CxEFlags], eax
/* Cleanup stack and return */ add rsp, 8