Author: tkreuzer Date: Sat Mar 20 01:49:30 2010 New Revision: 46283
URL: http://svn.reactos.org/svn/reactos?rev=46283&view=rev Log: [CRT] Import x87 asm version of pow() from glibc. Replaces our broken implementation, fixing a bunch of winetests (oleaut32 vartest)
Modified: trunk/reactos/lib/sdk/crt/math/i386/pow_asm.s
Modified: trunk/reactos/lib/sdk/crt/math/i386/pow_asm.s URL: http://svn.reactos.org/svn/reactos/trunk/reactos/lib/sdk/crt/math/i386/pow_a... ============================================================================== --- trunk/reactos/lib/sdk/crt/math/i386/pow_asm.s [iso-8859-1] (original) +++ trunk/reactos/lib/sdk/crt/math/i386/pow_asm.s [iso-8859-1] Sat Mar 20 01:49:30 2010 @@ -1,121 +1,368 @@ -/* - * COPYRIGHT: See COPYING in the top level directory - * PROJECT: ReactOS kernel - * PURPOSE: Run-Time Library - * FILE: lib/rtl/i386/pow.S - * PROGRAMER: Alex Ionescu (alex@relsoft.net) - * - * Copyright (C) 2002 Michael Ringgaard. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the project nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES// LOSS OF USE, DATA, OR PROFITS// OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -.globl _pow - - /* DATA ********************************************************************/ - -fzero: - .long 0 // Floating point zero - .long 0 // Floating point zero - -.intel_syntax noprefix - -/* FUNCTIONS ***************************************************************/ - +/* ix87 specific implementation of pow function. + Copyright (C) 1996, 1997, 1998, 1999, 2001, 2004, 2005, 2007 + Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper drepper@cygnus.com, 1996. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +/* Reacros modifications */ +#define ALIGNARG(log2) log2 +#define ASM_TYPE_DIRECTIVE(name,typearg) +#define ASM_SIZE_DIRECTIVE(name) +#define cfi_adjust_cfa_offset(x) +#define ENTRY(x) +#define END(x) +.global _pow + + .text + + .align ALIGNARG(4) + ASM_TYPE_DIRECTIVE(infinity,@object) +inf_zero: +infinity: + .byte 0, 0, 0, 0, 0, 0, 0xf0, 0x7f + ASM_SIZE_DIRECTIVE(infinity) + ASM_TYPE_DIRECTIVE(zero,@object) +zero: .double 0.0 + ASM_SIZE_DIRECTIVE(zero) + ASM_TYPE_DIRECTIVE(minf_mzero,@object) +minf_mzero: +minfinity: + .byte 0, 0, 0, 0, 0, 0, 0xf0, 0xff +mzero: + .byte 0, 0, 0, 0, 0, 0, 0, 0x80 + ASM_SIZE_DIRECTIVE(minf_mzero) + ASM_TYPE_DIRECTIVE(one,@object) +one: .double 1.0 + ASM_SIZE_DIRECTIVE(one) + ASM_TYPE_DIRECTIVE(limit,@object) +limit: .double 0.29 + ASM_SIZE_DIRECTIVE(limit) + ASM_TYPE_DIRECTIVE(p63,@object) +p63: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43 + ASM_SIZE_DIRECTIVE(p63) + +#ifdef PIC +#define MO(op) op##@GOTOFF(%ecx) +#define MOX(op,x,f) op##@GOTOFF(%ecx,x,f) +#else +#define MO(op) op +#define MOX(op,x,f) op(,x,f) +#endif + + .text _pow: - push ebp - mov ebp,esp - sub esp,12 // Allocate temporary space - push edi // Save register edi - push eax // Save register eax - mov dword ptr [ebp-12],0 // Set negation flag to zero - fld qword ptr [ebp+16] // Load real from stack - fld qword ptr [ebp+8] // Load real from stack - mov edi,offset flat:fzero // Point to real zero - fcom qword ptr [edi] // Compare x with zero - fstsw ax // Get the FPU status word - mov al,ah // Move condition flags to AL - lahf // Load Flags into AH - and al, 0b01000101 // Isolate C0, C2 and C3 - and ah, 0b10111010 // Turn off CF, PF and ZF - or ah,al // Set new CF, PF and ZF - sahf // Store AH into Flags - jb __fpow1 // Re-direct if x < 0 - ja __fpow2 // Re-direct if x > 0 - fxch // Swap st, st(1) - fcom qword ptr [edi] // Compare y with zero - fxch // Restore x as top of stack - fstsw ax // Get the FPU status word - mov al,ah // Move condition flags to AL - lahf // Load Flags into AH - and al, 0b01000101 // Isolate C0, C2 and C3 - and ah, 0b10111010 // Turn off CF, PF and ZF - or ah,al // Set new CF, PF and ZF - sahf // Store AH into Flags - jmp __fpow2 // Re-direct -__fpow1: fxch // Put y on top of stack - fld st // Duplicate y as st(1) - frndint // Round to integer - fxch // Put y on top of stack - fcomp // y = int(y) ? - fstsw ax // Get the FPU status word - mov al,ah // Move condition flags to AL - lahf // Load Flags into AH - and al, 0b01000101 // Isolate C0, C2 and C3 - and ah, 0b10111010 // Turn off CF, PF and ZF - or ah,al // Set new CF, PF and ZF - sahf // Store AH into Flags - jne __fpow4 // Proceed if y = int(y) - fist dword ptr [ebp-12] // Store y as integer - and dword ptr [ebp-12],1 // Set bit if y is odd - fxch // Put x on top of stack - fabs // x = |x| -__fpow2: fldln2 // Load log base e of 2 - fxch st(1) // Exchange st, st(1) - fyl2x // Compute the natural log(x) - fmulp // Compute y * ln(x) - fldl2e // Load log base 2(e) - fmulp st(1),st // Multiply x * log base 2(e) - fst st(1) // Push result - frndint // Round to integer - fsub st(1),st // Subtract - fxch // Exchange st, st(1) - f2xm1 // Compute 2 to the (x - 1) - fld1 // Load real number 1 - faddp // 2 to the x - fscale // Scale by power of 2 - fstp st(1) // Set new stack top and pop - test dword ptr [ebp-12],1 // Negation required ? - jz __fpow3 // No, re-direct - fchs // Negate the result -__fpow3: fstp qword ptr [ebp-8] // Save (double)pow(x, y) - fld qword ptr [ebp-8] // Load (double)pow(x, y) -__fpow4: pop eax // Restore register eax - pop edi // Restore register edi - mov esp,ebp // Deallocate temporary space - pop ebp - ret +ENTRY(__ieee754_pow) + fldl 12(%esp) // y + fxam + +#ifdef PIC + LOAD_PIC_REG (cx) +#endif + + fnstsw + movb %ah, %dl + andb $0x45, %ah + cmpb $0x40, %ah // is y == 0 ? + je 11f + + cmpb $0x05, %ah // is y == ±inf ? + je 12f + + cmpb $0x01, %ah // is y == NaN ? + je 30f + + fldl 4(%esp) // x : y + + subl $8,%esp + cfi_adjust_cfa_offset (8) + + fxam + fnstsw + movb %ah, %dh + andb $0x45, %ah + cmpb $0x40, %ah + je 20f // x is ±0 + + cmpb $0x05, %ah + je 15f // x is ±inf + + fxch // y : x + + /* fistpll raises invalid exception for |y| >= 1L<<63. */ + fld %st // y : y : x + fabs // |y| : y : x + fcompl MO(p63) // y : x + fnstsw + sahf + jnc 2f + + /* First see whether `y' is a natural number. In this case we + can use a more precise algorithm. */ + fld %st // y : y : x + fistpll (%esp) // y : x + fildll (%esp) // int(y) : y : x + fucomp %st(1) // y : x + fnstsw + sahf + jne 2f + + /* OK, we have an integer value for y. */ + popl %eax + cfi_adjust_cfa_offset (-4) + popl %edx + cfi_adjust_cfa_offset (-4) + orl $0, %edx + fstp %st(0) // x + jns 4f // y >= 0, jump + fdivrl MO(one) // 1/x (now referred to as x) + negl %eax + adcl $0, %edx + negl %edx +4: fldl MO(one) // 1 : x + fxch + +6: shrdl $1, %edx, %eax + jnc 5f + fxch + fmul %st(1) // x : ST*x + fxch +5: fmul %st(0), %st // x*x : ST*x + shrl $1, %edx + movl %eax, %ecx + orl %edx, %ecx + jnz 6b + fstp %st(0) // ST*x + ret + + /* y is ±NAN */ +30: fldl 4(%esp) // x : y + fldl MO(one) // 1.0 : x : y + fucomp %st(1) // x : y + fnstsw + sahf + je 31f + fxch // y : x +31: fstp %st(1) + ret + + cfi_adjust_cfa_offset (8) + .align ALIGNARG(4) +2: /* y is a real number. */ + fxch // x : y + fldl MO(one) // 1.0 : x : y + fldl MO(limit) // 0.29 : 1.0 : x : y + fld %st(2) // x : 0.29 : 1.0 : x : y + fsub %st(2) // x-1 : 0.29 : 1.0 : x : y + fabs // |x-1| : 0.29 : 1.0 : x : y + fucompp // 1.0 : x : y + fnstsw + fxch // x : 1.0 : y + sahf + ja 7f + fsub %st(1) // x-1 : 1.0 : y + fyl2xp1 // log2(x) : y + jmp 8f + +7: fyl2x // log2(x) : y +8: fmul %st(1) // y*log2(x) : y + fst %st(1) // y*log2(x) : y*log2(x) + frndint // int(y*log2(x)) : y*log2(x) + fsubr %st, %st(1) // int(y*log2(x)) : fract(y*log2(x)) + fxch // fract(y*log2(x)) : int(y*log2(x)) + f2xm1 // 2^fract(y*log2(x))-1 : int(y*log2(x)) + faddl MO(one) // 2^fract(y*log2(x)) : int(y*log2(x)) + fscale // 2^fract(y*log2(x))*2^int(y*log2(x)) : int(y*log2(x)) + addl $8, %esp + cfi_adjust_cfa_offset (-8) + fstp %st(1) // 2^fract(y*log2(x))*2^int(y*log2(x)) + ret + + + // pow(x,±0) = 1 + .align ALIGNARG(4) +11: fstp %st(0) // pop y + fldl MO(one) + ret + + // y == ±inf + .align ALIGNARG(4) +12: fstp %st(0) // pop y + fldl MO(one) // 1 + fldl 4(%esp) // x : 1 + fabs // abs(x) : 1 + fucompp // < 1, == 1, or > 1 + fnstsw + andb $0x45, %ah + cmpb $0x45, %ah + je 13f // jump if x is NaN + + cmpb $0x40, %ah + je 14f // jump if |x| == 1 + + shlb $1, %ah + xorb %ah, %dl + andl $2, %edx + fldl MOX(inf_zero, %edx, 4) + ret + + .align ALIGNARG(4) +14: fldl MO(one) + ret + + .align ALIGNARG(4) +13: fldl 4(%esp) // load x == NaN + ret + + cfi_adjust_cfa_offset (8) + .align ALIGNARG(4) + // x is ±inf +15: fstp %st(0) // y + testb $2, %dh + jz 16f // jump if x == +inf + + // We must find out whether y is an odd integer. + fld %st // y : y + fistpll (%esp) // y + fildll (%esp) // int(y) : y + fucompp // <empty> + fnstsw + sahf + jne 17f + + // OK, the value is an integer, but is the number of bits small + // enough so that all are coming from the mantissa? + popl %eax + cfi_adjust_cfa_offset (-4) + popl %edx + cfi_adjust_cfa_offset (-4) + andb $1, %al + jz 18f // jump if not odd + movl %edx, %eax + orl %edx, %edx + jns 155f + negl %eax +155: cmpl $0x00200000, %eax + ja 18f // does not fit in mantissa bits + // It's an odd integer. + shrl $31, %edx + fldl MOX(minf_mzero, %edx, 8) + ret + + cfi_adjust_cfa_offset (8) + .align ALIGNARG(4) +16: fcompl MO(zero) + addl $8, %esp + cfi_adjust_cfa_offset (-8) + fnstsw + shrl $5, %eax + andl $8, %eax + fldl MOX(inf_zero, %eax, 1) + ret + + cfi_adjust_cfa_offset (8) + .align ALIGNARG(4) +17: shll $30, %edx // sign bit for y in right position + addl $8, %esp + cfi_adjust_cfa_offset (-8) +18: shrl $31, %edx + fldl MOX(inf_zero, %edx, 8) + ret + + cfi_adjust_cfa_offset (8) + .align ALIGNARG(4) + // x is ±0 +20: fstp %st(0) // y + testb $2, %dl + jz 21f // y > 0 + + // x is ±0 and y is < 0. We must find out whether y is an odd integer. + testb $2, %dh + jz 25f + + fld %st // y : y + fistpll (%esp) // y + fildll (%esp) // int(y) : y + fucompp // <empty> + fnstsw + sahf + jne 26f + + // OK, the value is an integer, but is the number of bits small + // enough so that all are coming from the mantissa? + popl %eax + cfi_adjust_cfa_offset (-4) + popl %edx + cfi_adjust_cfa_offset (-4) + andb $1, %al + jz 27f // jump if not odd + cmpl $0xffe00000, %edx + jbe 27f // does not fit in mantissa bits + // It's an odd integer. + // Raise divide-by-zero exception and get minus infinity value. + fldl MO(one) + fdivl MO(zero) + fchs + ret + + cfi_adjust_cfa_offset (8) +25: fstp %st(0) +26: addl $8, %esp + cfi_adjust_cfa_offset (-8) +27: // Raise divide-by-zero exception and get infinity value. + fldl MO(one) + fdivl MO(zero) + ret + + cfi_adjust_cfa_offset (8) + .align ALIGNARG(4) + // x is ±0 and y is > 0. We must find out whether y is an odd integer. +21: testb $2, %dh + jz 22f + + fld %st // y : y + fistpll (%esp) // y + fildll (%esp) // int(y) : y + fucompp // <empty> + fnstsw + sahf + jne 23f + + // OK, the value is an integer, but is the number of bits small + // enough so that all are coming from the mantissa? + popl %eax + cfi_adjust_cfa_offset (-4) + popl %edx + cfi_adjust_cfa_offset (-4) + andb $1, %al + jz 24f // jump if not odd + cmpl $0xffe00000, %edx + jae 24f // does not fit in mantissa bits + // It's an odd integer. + fldl MO(mzero) + ret + + cfi_adjust_cfa_offset (8) +22: fstp %st(0) +23: addl $8, %esp // Don't use 2 x pop + cfi_adjust_cfa_offset (-8) +24: fldl MO(zero) + ret + +END(__ieee754_pow) + +