Author: tkreuzer Date: Mon Jan 28 23:44:29 2013 New Revision: 58251
URL: http://svn.reactos.org/svn/reactos?rev=58251&view=rev Log: [CRT] - Remove x64 asm stub for acos from cmake file, since we already have a generic C implementation - Implement sqrt for amd64 in SSE, both in C and asm. While the C version would be sufficient, it's currently less portable due to the lack of mm intrinsics for GCC - Silence a warning
Added: trunk/reactos/lib/sdk/crt/math/amd64/asin.c (with props) trunk/reactos/lib/sdk/crt/math/amd64/sqrt.c (with props) Modified: trunk/reactos/lib/sdk/crt/crt.cmake trunk/reactos/lib/sdk/crt/locale/locale.c trunk/reactos/lib/sdk/crt/math/amd64/sqrt.S
Modified: trunk/reactos/lib/sdk/crt/crt.cmake URL: http://svn.reactos.org/svn/reactos/trunk/reactos/lib/sdk/crt/crt.cmake?rev=5... ============================================================================== --- trunk/reactos/lib/sdk/crt/crt.cmake [iso-8859-1] (original) +++ trunk/reactos/lib/sdk/crt/crt.cmake [iso-8859-1] Mon Jan 28 23:44:29 2013 @@ -433,7 +433,7 @@ float/amd64/getsetfpcw.S float/amd64/fpreset.S float/amd64/logb.S - math/amd64/acos.S + # math/amd64/acos.S math/amd64/acosf.S math/amd64/atan.S math/amd64/atan2.S
Modified: trunk/reactos/lib/sdk/crt/locale/locale.c URL: http://svn.reactos.org/svn/reactos/trunk/reactos/lib/sdk/crt/locale/locale.c... ============================================================================== --- trunk/reactos/lib/sdk/crt/locale/locale.c [iso-8859-1] (original) +++ trunk/reactos/lib/sdk/crt/locale/locale.c [iso-8859-1] Mon Jan 28 23:44:29 2013 @@ -1402,6 +1402,7 @@ if(category == LC_ALL) return construct_lc_all(locinfo);
+ _Analysis_assume_(category <= 5); return locinfo->lc_category[category].locale; }
@@ -1481,13 +1482,13 @@ void __init_global_locale() { unsigned i; - + LOCK_LOCALE; /* Someone created it before us */ if(global_locale) return; global_locale = MSVCRT__create_locale(0, "C"); - + __lc_codepage = MSVCRT_locale->locinfo->lc_codepage; MSVCRT___lc_collate_cp = MSVCRT_locale->locinfo->lc_collate_cp; __mb_cur_max = MSVCRT_locale->locinfo->mb_cur_max;
Added: trunk/reactos/lib/sdk/crt/math/amd64/asin.c URL: http://svn.reactos.org/svn/reactos/trunk/reactos/lib/sdk/crt/math/amd64/asin... ============================================================================== --- trunk/reactos/lib/sdk/crt/math/amd64/asin.c (added) +++ trunk/reactos/lib/sdk/crt/math/amd64/asin.c [iso-8859-1] Mon Jan 28 23:44:29 2013 @@ -1,0 +1,73 @@ +/* + * COPYRIGHT: See COPYING in the top level directory + * PROJECT: ReactOS CRT + * FILE: lib/crt/math/acos.c + * PURPOSE: Generic C implementation of arc sine + * PROGRAMMER: Timo Kreuzer (timo.kreuzer@reactos.org) + */ + +#define PRECISION 9 + +/* + * The arc sine can be approximated with the following row: + * + * asin(x) = a0*x + a1*x^3 + a2*x^5 + a3*x^7 + a4*x^9 + ... + * + * To reduce the number of multiplications the formula is transformed to + * + * asin(x) = x * (1 + x^2*(a1 + x^2*(a2 + x^2*(a3 + ...) ) ) ) + * + * The coefficients are: + * a0 = 1 + * a1 = (1/2*3) + * a2 = (3*1/4*2*5) + * a3 = (5*3*1/6*4*2*7) + * a4 = (7*5*3*1/8*6*4*2*9) + * a5 = (9*7*5*3*1/10*8*6*4*2*11) + * ... + */ + +double +asin(double x) +{ + double x2, result; + + /* Check range */ + if ((x > 1.) || (x < -1.)) return NaN; + + /* Calculate the square of x */ + x2 = (x * x); + + /* Start with 0, compiler will optimize this away */ + result = 0; + + result += (15*13*11*9*7*5*3*1./(16*14*12*10*8*6*4*2*17)); + result *= x2; + + result += (13*11*9*7*5*3*1./(14*12*10*8*6*4*2*15)); + result *= x2; + + result += (11*9*7*5*3*1./(12*10*8*6*4*2*13)); + result *= x2; + + result += (9*7*5*3*1./(10*8*6*4*2*11)); + result *= x2; + + result += (7*5*3*1./(8*6*4*2*9)); + result *= x2; + + result += (5*3*1./(6*4*2*7)); + result *= x2; + + result += (3*1./(4*2*5)); + result *= x2; + + result += (1./(2*3)); + result *= x2; + + result += 1.; + result *= x; + + return result; +} +
Propchange: trunk/reactos/lib/sdk/crt/math/amd64/asin.c ------------------------------------------------------------------------------ svn:eol-style = native
Modified: trunk/reactos/lib/sdk/crt/math/amd64/sqrt.S URL: http://svn.reactos.org/svn/reactos/trunk/reactos/lib/sdk/crt/math/amd64/sqrt... ============================================================================== --- trunk/reactos/lib/sdk/crt/math/amd64/sqrt.S [iso-8859-1] (original) +++ trunk/reactos/lib/sdk/crt/math/amd64/sqrt.S [iso-8859-1] Mon Jan 28 23:44:29 2013 @@ -9,14 +9,57 @@ /* INCLUDES ******************************************************************/
#include <asm.inc> -#include <ksamd64.inc>
/* CODE **********************************************************************/ .code64
PUBLIC sqrt sqrt: - UNIMPLEMENTED sqrt + + /* Load the sign bit into rdx */ + mov rdx, HEX(8000000000000000) + + /* Move the lower 64 bits of xmm0 into rax */ + movd rax, xmm0 + + /* Test the sign bit */ + test rax, rdx + + /* If it is set, go to the failure path */ + jnz x_is_negative + + /* x is positive, now check if it is NaN by checking if the unsigned + integer value is larger than the highest valid positive value. */ + mov rcx, 7FF0000000000000h + cmp rax, rcx + ja short x_is_nan + + /* All is well, calculate the sqrt */ + sqrtpd xmm0, xmm0 ret
+x_is_negative: + /* Load failure return value (-1.#IND00) into rcx */ + mov rcx, HEX(0FFF8000000000000) + + /* Check if the parameter was -0.0 */ + cmp rax, rdx + + /* If it was not, load the failure value, otherwise keep -0.0 */ + cmovne rax, rcx + + /* Move the value back into the return register */ + movd xmm0, rax + ret + +x_is_nan: + /* Create a 1.#QNAN0 by setting this bit */ + mov rcx, HEX(8000000000000) + or rax, rcx + + /* Move the value back into the return register */ + movd xmm0, rax + ret + + END
Added: trunk/reactos/lib/sdk/crt/math/amd64/sqrt.c URL: http://svn.reactos.org/svn/reactos/trunk/reactos/lib/sdk/crt/math/amd64/sqrt... ============================================================================== --- trunk/reactos/lib/sdk/crt/math/amd64/sqrt.c (added) +++ trunk/reactos/lib/sdk/crt/math/amd64/sqrt.c [iso-8859-1] Mon Jan 28 23:44:29 2013 @@ -1,0 +1,77 @@ + +#include <intrin.h> + +double +sqrt ( + double x) +{ + register union + { + __m128d x128d; + __m128i x128i; + } u ; + register union + { + unsigned long long ullx; + double dbl; + } u2; + + /* Set the lower double-precision value of u to x. + All that we want, is that the compiler understands that we have the + function parameter in a register that we can address as an __m128. + Sadly there is no obvious way to do that. If we use the union, VS will + generate code to store xmm0 in memory and the read it into a GPR. + We avoid memory access by using a direct move. But even here we won't + get a simple MOVSD. We can either do: + a) _mm_set_sd: move x into the lower part of an xmm register and zero + out the upper part (XORPD+MOVSD) + b) _mm_set1_pd: move x into the lower and higher part of an xmm register + (MOVSD+UNPCKLPD) + c) _mm_set_pd, which either generates a memory access, when we try to + tell it to keep the upper 64 bits, or generate 2 MOVAPS + UNPCKLPD + We choose a, which is probably the fastest. + */ + u.x128d = _mm_set_sd(x); + + /* Move the contents of the lower 64 bit into a 64 bit GPR using MOVD */ + u2.ullx = _mm_cvtsi128_si64(u.x128i); + + /* Check for negative values */ + if (u2.ullx & 0x8000000000000000ULL) + { + /* Check if this is *really* negative and not just -0.0 */ + if (u2.ullx != 0x8000000000000000ULL) + { + /* Return -1.#IND00 */ + u2.ullx = 0xfff8000000000000ULL; + } + + /* Return what we have */ + return u2.dbl; + } + + /* Check if this is a NaN (bits 52-62 are 1, bit 0-61 are not all 0) or + negative (bit 63 is 1) */ + if (u2.ullx > 0x7FF0000000000000ULL) + { + /* Set this bit. That's what MS function does. */ + u2.ullx |= 0x8000000000000ULL; + return u2.dbl; + } + + /* Calculate the square root. */ +#ifdef _MSC_VER + /* Another YAY for the MS compiler. There are 2 instructions we could use: + SQRTPD (computes sqrt for 2 double values) or SQRTSD (computes sqrt for + only the lower 64 bit double value). Obviously we only need 1. And on + Some architectures SQRTPD is twice as slow as SQRTSD. On the other hand + the MS compiler is stupid and always generates an additional MOVAPS + instruction when SQRTSD is used. We choose to use SQRTPD here since on + modern hardware it's as fast as SQRTSD. */ + u.x128d = _mm_sqrt_pd(u.x128d); // SQRTPD +#else + u.x128d = _mm_sqrt_sd(u.x128d, u.x128d); // SQRTSD +#endif + + return u.x128d.m128d_f64[0]; +}
Propchange: trunk/reactos/lib/sdk/crt/math/amd64/sqrt.c ------------------------------------------------------------------------------ svn:eol-style = native