That would be a few lines, wouldn't it? Ok, let me do the work for you. And now compile and show me how the loop would be optimized anywhere near the asm code. Or can you do better?
BOOLEAN DIB_32BPP_ColorFill(SURFOBJ* pso, RECTL* prcl, ULONG iColor); { ULONG lDelta, cx, cy; ULONG pulLine;
lDelta = pso->lDelta; pulLine= (PULONG)((PCHAR)pso->pvScan0 + prcl->top * lDelta + prcl->left * 4);
cx = prcl->right - prcl->left; if (cx <= 0) return TRUE;
cy = prcl->bottom - prcl->top; if (cy <= 0) return TRUE;
do { memset(pulLine, iColor, cx); pulLine += lDelta / 4; cy--; } while (cy > 0);
return TRUE; }
Aleksey Bragin schrieb:
"in a few lines" - and what if about using the same algorithm you used in this assembly, but without pretending to be compiler?
WBR, Aleksey.
On Aug 3, 2009, at 7:31 AM, Timo Kreuzer wrote:
I hereby challenge you to provide portable C code, that - compiled with gcc - is faster than this assembly code. Should be done in a few lines.
I bet my ass on it: You will fail! No matter what optimization you choose. You would also fail with msvc or Intel compiler.
Regards, Timo
Alex Ionescu wrote:
The version that GCC 4.4 and CL 15 will generate would be way more optimized than this unportable/slower assembly code. This isn't 1994 anymore. You can't beat the compiler anymore.
Best regards, Alex Ionescu
On Sun, Aug 2, 2009 at 3:31 PM, tkreuzer@svn.reactos.org wrote:
Author: tkreuzer Date: Mon Aug 3 00:31:29 2009 New Revision: 42353
URL: http://svn.reactos.org/svn/reactos?rev=42353&view=rev Log: asm version of DIB_32BPP_ColorFill:
- Add frame pointer
- Get rid of algin_draw, 32bpp surfaces must be DWORD aligned
- Optimize the loop
- Add comments
Modified: trunk/reactos/subsystems/win32/win32k/dib/i386/dib32bpp_colorfill.s
Modified: trunk/reactos/subsystems/win32/win32k/dib/i386/dib32bpp_colorfill.s URL: http://svn.reactos.org/svn/reactos/trunk/reactos/subsystems/win32/win32k/dib...
==============================================================================
trunk/reactos/subsystems/win32/win32k/dib/i386/dib32bpp_colorfill.s [iso-8859-1] (original) +++ trunk/reactos/subsystems/win32/win32k/dib/i386/dib32bpp_colorfill.s [iso-8859-1] Mon Aug 3 00:31:29 2009 @@ -4,78 +4,62 @@
- FILE:
subsystems/win32/win32k/dib/i386/dib32bpp_colorfill.c
- PURPOSE: ASM optimised 32bpp ColorFill
- PROGRAMMERS: Magnus Olsen
Timo Kreuzer (timo.kreuzer@rectos.org)*/
- .globl _DIB_32BPP_ColorFill
- .intel_syntax noprefix
+.intel_syntax noprefix
- .def _DIB_32BPP_ColorFill;
- .scl 2;
- .type 32;
- .endef
- _DIB_32BPP_ColorFill:
sub esp, 24mov ecx, [esp+32]mov [esp+8], ebxmov ebx, [esp+28]mov [esp+20], ebpmov ebp, [esp+36]mov [esp+12], esimov [esp+16], edimov edi, [ecx]mov esi, [ecx+8]mov edx, [ebx+36]sub esi, edimov edi, [ecx+4]mov eax, ediimul eax, edxadd eax, [ebx+32]mov ebx, [ecx]lea eax, [eax+ebx*4]mov [esp+4], eaxmov eax, [ecx+12]cmp eax, edijbe endsub eax, edimov [esp], eaxlea esi, [esi+0]+/*
- BOOLEAN
- _cdecl
- DIB_32BPP_ColorFill(SURFOBJ* pso, RECTL* prcl, ULONG iColor);
+*/
for_loop:mov eax, ebpcldmov ebx, esimov edi, [esp+4]test edi, 3jnz algin_drawmov ecx, esirep stosdadd [esp+4], edxdec dword ptr [esp]jnz for_loopend:mov ebx, [esp+8]mov eax, 1mov esi, [esp+12]mov edi, [esp+16]mov ebp, [esp+20]add esp, 24ret+.globl _DIB_32BPP_ColorFill +_DIB_32BPP_ColorFill:
push ebpmov ebp, esppush ebxpush esipush edisub esp, 4 /* Space for lDelta */
algin_draw:stosddec ebxmov ecx, ebxrol eax, 16stosdadd [esp+4], edxdec dword ptr [esp]jnz for_loop
mov edx, [ebp+12] /* edx = prcl */mov ecx, [ebp+8] /* ecx = pso */
mov ebx, [esp+8]mov eax, 1mov esi, [esp+12]mov edi, [esp+16]mov ebp, [esp+20]add esp, 24ret
mov ebx, [ecx+0x24] /* ebx = pso->lDelta; */mov [esp], ebx /* lDelta = pso->lDelta; */mov edi, [edx+4] /* edi = prcl->top; */mov eax, edi /* eax = prcl->top; */imul eax, ebx /* eax = prcl->top *pso->lDelta; */
add eax, [ecx+0x20] /* eax += pso->pvScan0; */mov ebx, [edx] /* ebx = prcl->left; */lea esi, [eax+ebx*4] /* esi = pvLine0 = eax + 4 *prcl->left; */
mov ebx, [edx+8] /* ebx = prcl->right; */sub ebx, [edx] /* ebx = prcl->right -prcl->left; */
jbe end /* if (ebx <= 0) goto end; */mov edx, [edx+12] /* edx = prcl->bottom; */sub edx, edi /* edx -= prcl->top; */jbe end /* if (eax <= 0) goto end; */mov eax, [ebp+16] /* eax = iColor; */cld+for_loop: /* do { */
mov edi, esi /* edi = pvLine0; */mov ecx, ebx /* ecx = cx; */rep stosd /* memset(pvLine0, iColor,cx); */
add esi, [esp] /* pvLine0 += lDelta; */dec edx /* cy--; */jnz for_loop /* } while (cy > 0); */+end:
mov eax, 1add esp, 4pop edipop esipop ebxpop ebpret
Ros-dev mailing list Ros-dev@reactos.org http://www.reactos.org/mailman/listinfo/ros-dev
Ros-dev mailing list Ros-dev@reactos.org http://www.reactos.org/mailman/listinfo/ros-dev
Ros-dev mailing list Ros-dev@reactos.org http://www.reactos.org/mailman/listinfo/ros-dev