[tkreuzer] 53887: [CRT] implement floor and floorf for amd64 with sse2 instructions - Ros-diffs

28 Sep 2011

Author: tkreuzer
Date: Wed Sep 28 21:32:37 2011
New Revision: 53887
URL: http://svn.reactos.org/svn/reactos?rev=53887&view=rev
Log:
[CRT]
implement floor and floorf for amd64 with sse2 instructions
Modified:
    trunk/reactos/lib/sdk/crt/libcntpr.cmake
    trunk/reactos/lib/sdk/crt/math/amd64/floor.S
    trunk/reactos/lib/sdk/crt/math/amd64/floorf.S
Modified: trunk/reactos/lib/sdk/crt/libcntpr.cmake
URL: http://svn.reactos.org/svn/reactos/trunk/reactos/lib/sdk/crt/libcntpr.cmake?...
==============================================================================

--- trunk/reactos/lib/sdk/crt/libcntpr.cmake [iso-8859-1] (original)
+++ trunk/reactos/lib/sdk/crt/libcntpr.cmake [iso-8859-1] Wed Sep 28 21:32:37 2011
@@ -111,6 +111,7 @@
         math/amd64/exp.S
         math/amd64/fabs.S
         math/amd64/floor.S
+        math/amd64/floorf.S
         math/amd64/fmod.S
         math/amd64/ldexp.S
         math/amd64/log.S
Modified: trunk/reactos/lib/sdk/crt/math/amd64/floor.S
URL: http://svn.reactos.org/svn/reactos/trunk/reactos/lib/sdk/crt/math/amd64/floo...
==============================================================================
--- trunk/reactos/lib/sdk/crt/math/amd64/floor.S [iso-8859-1] (original)
+++ trunk/reactos/lib/sdk/crt/math/amd64/floor.S [iso-8859-1] Wed Sep 28 21:32:37 2011
@@ -9,14 +9,33 @@
 /* INCLUDES ******************************************************************/
#include <asm.inc>
-#include <ksamd64.inc>
/* CODE **********************************************************************/
 .code64
PUBLIC floor
-floor:
-    UNIMPLEMENTED floor
+FUNC floor
+    sub rsp, 16
+    .ENDPROLOG
+
+    /* Truncate xmm0 to integer (double precision) */
+    cvttsd2si rcx, xmm0
+
+    /* Duplicate the bits into rax */
+    movd rax, xmm0
+
+    /* Shift all bits to the right, keeping the sign bit */
+    shr rax, 63
+
+    /* Substract the sign bit from the truncated value, so that
+       we get the correct result for negative values. */
+    sub rcx, rax
+
+    /* Convert the result back to xmm0 (double precision) */
+    cvtsi2sd xmm0, rcx
+
+    add rsp, 16
     ret
+ENDFUNC floor
END
Modified: trunk/reactos/lib/sdk/crt/math/amd64/floorf.S
URL: http://svn.reactos.org/svn/reactos/trunk/reactos/lib/sdk/crt/math/amd64/floo...
==============================================================================
--- trunk/reactos/lib/sdk/crt/math/amd64/floorf.S [iso-8859-1] (original)
+++ trunk/reactos/lib/sdk/crt/math/amd64/floorf.S [iso-8859-1] Wed Sep 28 21:32:37 2011
@@ -1,7 +1,7 @@
 /*
  * COPYRIGHT:         See COPYING in the top level directory
  * PROJECT:           ReactOS system libraries
- * PURPOSE:           Implementation of tan
+ * PURPOSE:           Implementation of floorf
  * FILE:              lib/sdk/crt/math/amd64/floorf.S
  * PROGRAMMER:        Timo Kreuzer (timo.kreuzer@reactos.org)
  */
@@ -9,7 +9,6 @@
 /* INCLUDES ******************************************************************/
#include <asm.inc>
-#include <ksamd64.inc>
/* CODE **********************************************************************/
 .code64
@@ -19,26 +18,22 @@
     sub rsp, 16
     .ENDPROLOG
-    /* Put parameter on the stack */
-    movss dword ptr [rsp], xmm0
-    fld   dword ptr [rsp]
+    /* Truncate xmm0 to integer (single precision) */
+    cvttss2si rcx, xmm0
-    /* Change fpu control word to round down */
-    fstcw [rsp]
-    mov   eax, [rsp]
-    or    eax, HEX(000400)
-    and   eax, HEX(00f7ff)
-    mov   [rsp + 8], eax
-    fldcw [rsp + 8]
+    /* Duplicate the bits into rax */
+    movd eax, xmm0
-    /* Round to integer */
-    frndint
+    /* Shift all bits to the right, keeping the sign bit */
+    shr rax, 31
-    /* Restore fpu control word */
-    fldcw [rsp]
+    /* Substract the sign bit from the truncated value, so that
+       we get the correct result for negative values. */
+    sub rcx, rax
-    fstp  dword ptr [rsp]
-    movss xmm0, dword ptr [rsp]
+    /* Convert the result back to xmm0 (single precision) */
+    cvtsi2ss xmm0, rcx
+
     add rsp, 16
     ret
 ENDFUNC floorf