Author: tkreuzer
Date: Mon Jan 28 23:44:29 2013
New Revision: 58251
URL: http://svn.reactos.org/svn/reactos?rev=58251&view=rev
Log:
[CRT]
- Remove x64 asm stub for acos from cmake file, since we already have a generic C implementation
- Implement sqrt for amd64 in SSE, both in C and asm. While the C version would be sufficient, it's currently less portable due to the lack of mm intrinsics for GCC
- Silence a warning
Added:
trunk/reactos/lib/sdk/crt/math/amd64/asin.c (with props)
trunk/reactos/lib/sdk/crt/math/amd64/sqrt.c (with props)
Modified:
trunk/reactos/lib/sdk/crt/crt.cmake
trunk/reactos/lib/sdk/crt/locale/locale.c
trunk/reactos/lib/sdk/crt/math/amd64/sqrt.S
Modified: trunk/reactos/lib/sdk/crt/crt.cmake
URL: http://svn.reactos.org/svn/reactos/trunk/reactos/lib/sdk/crt/crt.cmake?rev=…
==============================================================================
--- trunk/reactos/lib/sdk/crt/crt.cmake [iso-8859-1] (original)
+++ trunk/reactos/lib/sdk/crt/crt.cmake [iso-8859-1] Mon Jan 28 23:44:29 2013
@@ -433,7 +433,7 @@
float/amd64/getsetfpcw.S
float/amd64/fpreset.S
float/amd64/logb.S
- math/amd64/acos.S
+ # math/amd64/acos.S
math/amd64/acosf.S
math/amd64/atan.S
math/amd64/atan2.S
Modified: trunk/reactos/lib/sdk/crt/locale/locale.c
URL: http://svn.reactos.org/svn/reactos/trunk/reactos/lib/sdk/crt/locale/locale.…
==============================================================================
--- trunk/reactos/lib/sdk/crt/locale/locale.c [iso-8859-1] (original)
+++ trunk/reactos/lib/sdk/crt/locale/locale.c [iso-8859-1] Mon Jan 28 23:44:29 2013
@@ -1402,6 +1402,7 @@
if(category == LC_ALL)
return construct_lc_all(locinfo);
+ _Analysis_assume_(category <= 5);
return locinfo->lc_category[category].locale;
}
@@ -1481,13 +1482,13 @@
void __init_global_locale()
{
unsigned i;
-
+
LOCK_LOCALE;
/* Someone created it before us */
if(global_locale)
return;
global_locale = MSVCRT__create_locale(0, "C");
-
+
__lc_codepage = MSVCRT_locale->locinfo->lc_codepage;
MSVCRT___lc_collate_cp = MSVCRT_locale->locinfo->lc_collate_cp;
__mb_cur_max = MSVCRT_locale->locinfo->mb_cur_max;
Added: trunk/reactos/lib/sdk/crt/math/amd64/asin.c
URL: http://svn.reactos.org/svn/reactos/trunk/reactos/lib/sdk/crt/math/amd64/asi…
==============================================================================
--- trunk/reactos/lib/sdk/crt/math/amd64/asin.c (added)
+++ trunk/reactos/lib/sdk/crt/math/amd64/asin.c [iso-8859-1] Mon Jan 28 23:44:29 2013
@@ -1,0 +1,73 @@
+/*
+ * COPYRIGHT: See COPYING in the top level directory
+ * PROJECT: ReactOS CRT
+ * FILE: lib/crt/math/acos.c
+ * PURPOSE: Generic C implementation of arc sine
+ * PROGRAMMER: Timo Kreuzer (timo.kreuzer(a)reactos.org)
+ */
+
+#define PRECISION 9
+
+/*
+ * The arc sine can be approximated with the following row:
+ *
+ * asin(x) = a0*x + a1*x^3 + a2*x^5 + a3*x^7 + a4*x^9 + ...
+ *
+ * To reduce the number of multiplications the formula is transformed to
+ *
+ * asin(x) = x * (1 + x^2*(a1 + x^2*(a2 + x^2*(a3 + ...) ) ) )
+ *
+ * The coefficients are:
+ * a0 = 1
+ * a1 = (1/2*3)
+ * a2 = (3*1/4*2*5)
+ * a3 = (5*3*1/6*4*2*7)
+ * a4 = (7*5*3*1/8*6*4*2*9)
+ * a5 = (9*7*5*3*1/10*8*6*4*2*11)
+ * ...
+ */
+
+double
+asin(double x)
+{
+ double x2, result;
+
+ /* Check range */
+ if ((x > 1.) || (x < -1.)) return NaN;
+
+ /* Calculate the square of x */
+ x2 = (x * x);
+
+ /* Start with 0, compiler will optimize this away */
+ result = 0;
+
+ result += (15*13*11*9*7*5*3*1./(16*14*12*10*8*6*4*2*17));
+ result *= x2;
+
+ result += (13*11*9*7*5*3*1./(14*12*10*8*6*4*2*15));
+ result *= x2;
+
+ result += (11*9*7*5*3*1./(12*10*8*6*4*2*13));
+ result *= x2;
+
+ result += (9*7*5*3*1./(10*8*6*4*2*11));
+ result *= x2;
+
+ result += (7*5*3*1./(8*6*4*2*9));
+ result *= x2;
+
+ result += (5*3*1./(6*4*2*7));
+ result *= x2;
+
+ result += (3*1./(4*2*5));
+ result *= x2;
+
+ result += (1./(2*3));
+ result *= x2;
+
+ result += 1.;
+ result *= x;
+
+ return result;
+}
+
Propchange: trunk/reactos/lib/sdk/crt/math/amd64/asin.c
------------------------------------------------------------------------------
svn:eol-style = native
Modified: trunk/reactos/lib/sdk/crt/math/amd64/sqrt.S
URL: http://svn.reactos.org/svn/reactos/trunk/reactos/lib/sdk/crt/math/amd64/sqr…
==============================================================================
--- trunk/reactos/lib/sdk/crt/math/amd64/sqrt.S [iso-8859-1] (original)
+++ trunk/reactos/lib/sdk/crt/math/amd64/sqrt.S [iso-8859-1] Mon Jan 28 23:44:29 2013
@@ -9,14 +9,57 @@
/* INCLUDES ******************************************************************/
#include <asm.inc>
-#include <ksamd64.inc>
/* CODE **********************************************************************/
.code64
PUBLIC sqrt
sqrt:
- UNIMPLEMENTED sqrt
+
+ /* Load the sign bit into rdx */
+ mov rdx, HEX(8000000000000000)
+
+ /* Move the lower 64 bits of xmm0 into rax */
+ movd rax, xmm0
+
+ /* Test the sign bit */
+ test rax, rdx
+
+ /* If it is set, go to the failure path */
+ jnz x_is_negative
+
+ /* x is positive, now check if it is NaN by checking if the unsigned
+ integer value is larger than the highest valid positive value. */
+ mov rcx, 7FF0000000000000h
+ cmp rax, rcx
+ ja short x_is_nan
+
+ /* All is well, calculate the sqrt */
+ sqrtpd xmm0, xmm0
ret
+x_is_negative:
+ /* Load failure return value (-1.#IND00) into rcx */
+ mov rcx, HEX(0FFF8000000000000)
+
+ /* Check if the parameter was -0.0 */
+ cmp rax, rdx
+
+ /* If it was not, load the failure value, otherwise keep -0.0 */
+ cmovne rax, rcx
+
+ /* Move the value back into the return register */
+ movd xmm0, rax
+ ret
+
+x_is_nan:
+ /* Create a 1.#QNAN0 by setting this bit */
+ mov rcx, HEX(8000000000000)
+ or rax, rcx
+
+ /* Move the value back into the return register */
+ movd xmm0, rax
+ ret
+
+
END
Added: trunk/reactos/lib/sdk/crt/math/amd64/sqrt.c
URL: http://svn.reactos.org/svn/reactos/trunk/reactos/lib/sdk/crt/math/amd64/sqr…
==============================================================================
--- trunk/reactos/lib/sdk/crt/math/amd64/sqrt.c (added)
+++ trunk/reactos/lib/sdk/crt/math/amd64/sqrt.c [iso-8859-1] Mon Jan 28 23:44:29 2013
@@ -1,0 +1,77 @@
+
+#include <intrin.h>
+
+double
+sqrt (
+ double x)
+{
+ register union
+ {
+ __m128d x128d;
+ __m128i x128i;
+ } u ;
+ register union
+ {
+ unsigned long long ullx;
+ double dbl;
+ } u2;
+
+ /* Set the lower double-precision value of u to x.
+ All that we want, is that the compiler understands that we have the
+ function parameter in a register that we can address as an __m128.
+ Sadly there is no obvious way to do that. If we use the union, VS will
+ generate code to store xmm0 in memory and the read it into a GPR.
+ We avoid memory access by using a direct move. But even here we won't
+ get a simple MOVSD. We can either do:
+ a) _mm_set_sd: move x into the lower part of an xmm register and zero
+ out the upper part (XORPD+MOVSD)
+ b) _mm_set1_pd: move x into the lower and higher part of an xmm register
+ (MOVSD+UNPCKLPD)
+ c) _mm_set_pd, which either generates a memory access, when we try to
+ tell it to keep the upper 64 bits, or generate 2 MOVAPS + UNPCKLPD
+ We choose a, which is probably the fastest.
+ */
+ u.x128d = _mm_set_sd(x);
+
+ /* Move the contents of the lower 64 bit into a 64 bit GPR using MOVD */
+ u2.ullx = _mm_cvtsi128_si64(u.x128i);
+
+ /* Check for negative values */
+ if (u2.ullx & 0x8000000000000000ULL)
+ {
+ /* Check if this is *really* negative and not just -0.0 */
+ if (u2.ullx != 0x8000000000000000ULL)
+ {
+ /* Return -1.#IND00 */
+ u2.ullx = 0xfff8000000000000ULL;
+ }
+
+ /* Return what we have */
+ return u2.dbl;
+ }
+
+ /* Check if this is a NaN (bits 52-62 are 1, bit 0-61 are not all 0) or
+ negative (bit 63 is 1) */
+ if (u2.ullx > 0x7FF0000000000000ULL)
+ {
+ /* Set this bit. That's what MS function does. */
+ u2.ullx |= 0x8000000000000ULL;
+ return u2.dbl;
+ }
+
+ /* Calculate the square root. */
+#ifdef _MSC_VER
+ /* Another YAY for the MS compiler. There are 2 instructions we could use:
+ SQRTPD (computes sqrt for 2 double values) or SQRTSD (computes sqrt for
+ only the lower 64 bit double value). Obviously we only need 1. And on
+ Some architectures SQRTPD is twice as slow as SQRTSD. On the other hand
+ the MS compiler is stupid and always generates an additional MOVAPS
+ instruction when SQRTSD is used. We choose to use SQRTPD here since on
+ modern hardware it's as fast as SQRTSD. */
+ u.x128d = _mm_sqrt_pd(u.x128d); // SQRTPD
+#else
+ u.x128d = _mm_sqrt_sd(u.x128d, u.x128d); // SQRTSD
+#endif
+
+ return u.x128d.m128d_f64[0];
+}
Propchange: trunk/reactos/lib/sdk/crt/math/amd64/sqrt.c
------------------------------------------------------------------------------
svn:eol-style = native