The version that GCC 4.4 and CL 15 will generate would be way more optimized
than this unportable/slower assembly code.
This isn't 1994 anymore. You can't beat the compiler anymore.
Best regards,
Alex Ionescu
On Sun, Aug 2, 2009 at 3:31 PM, <tkreuzer(a)svn.reactos.org> wrote:
Author: tkreuzer
Date: Mon Aug 3 00:31:29 2009
New Revision: 42353
URL:
http://svn.reactos.org/svn/reactos?rev=42353&view=rev
Log:
asm version of DIB_32BPP_ColorFill:
- Add frame pointer
- Get rid of algin_draw, 32bpp surfaces must be DWORD aligned
- Optimize the loop
- Add comments
Modified:
trunk/reactos/subsystems/win32/win32k/dib/i386/dib32bpp_colorfill.s
Modified:
trunk/reactos/subsystems/win32/win32k/dib/i386/dib32bpp_colorfill.s
URL:
http://svn.reactos.org/svn/reactos/trunk/reactos/subsystems/win32/win32k/di…
==============================================================================
--- trunk/reactos/subsystems/win32/win32k/dib/i386/dib32bpp_colorfill.s
[iso-8859-1] (original)
+++ trunk/reactos/subsystems/win32/win32k/dib/i386/dib32bpp_colorfill.s
[iso-8859-1] Mon Aug 3 00:31:29 2009
@@ -4,78 +4,62 @@
* FILE: subsystems/win32/win32k/dib/i386/dib32bpp_colorfill.c
* PURPOSE: ASM optimised 32bpp ColorFill
* PROGRAMMERS: Magnus Olsen
+ * Timo Kreuzer (timo.kreuzer(a)rectos.org)
*/
- .globl _DIB_32BPP_ColorFill
- .intel_syntax noprefix
+.intel_syntax noprefix
- .def _DIB_32BPP_ColorFill;
- .scl 2;
- .type 32;
- .endef
-
- _DIB_32BPP_ColorFill:
- sub esp, 24
- mov ecx, [esp+32]
- mov [esp+8], ebx
- mov ebx, [esp+28]
- mov [esp+20], ebp
- mov ebp, [esp+36]
- mov [esp+12], esi
- mov [esp+16], edi
- mov edi, [ecx]
- mov esi, [ecx+8]
- mov edx, [ebx+36]
- sub esi, edi
- mov edi, [ecx+4]
- mov eax, edi
- imul eax, edx
- add eax, [ebx+32]
- mov ebx, [ecx]
- lea eax, [eax+ebx*4]
- mov [esp+4], eax
- mov eax, [ecx+12]
- cmp eax, edi
- jbe end
- sub eax, edi
- mov [esp], eax
- lea esi, [esi+0]
+/*
+ * BOOLEAN
+ * _cdecl
+ * DIB_32BPP_ColorFill(SURFOBJ* pso, RECTL* prcl, ULONG iColor);
+*/
- for_loop:
- mov eax, ebp
- cld
- mov ebx, esi
- mov edi, [esp+4]
- test edi, 3
- jnz algin_draw
- mov ecx, esi
- rep stosd
- add [esp+4], edx
- dec dword ptr [esp]
- jnz for_loop
- end:
- mov ebx, [esp+8]
- mov eax, 1
- mov esi, [esp+12]
- mov edi, [esp+16]
- mov ebp, [esp+20]
- add esp, 24
- ret
+.globl _DIB_32BPP_ColorFill
+_DIB_32BPP_ColorFill:
+ push ebp
+ mov ebp, esp
+ push ebx
+ push esi
+ push edi
+ sub esp, 4 /* Space for lDelta */
- algin_draw:
- stosd
- dec ebx
- mov ecx, ebx
- rol eax, 16
- stosd
- add [esp+4], edx
- dec dword ptr [esp]
- jnz for_loop
+ mov edx, [ebp+12] /* edx = prcl */
+ mov ecx, [ebp+8] /* ecx = pso */
- mov ebx, [esp+8]
- mov eax, 1
- mov esi, [esp+12]
- mov edi, [esp+16]
- mov ebp, [esp+20]
- add esp, 24
- ret
+ mov ebx, [ecx+0x24] /* ebx = pso->lDelta; */
+ mov [esp], ebx /* lDelta = pso->lDelta; */
+ mov edi, [edx+4] /* edi = prcl->top; */
+ mov eax, edi /* eax = prcl->top; */
+ imul eax, ebx /* eax = prcl->top * pso->lDelta; */
+ add eax, [ecx+0x20] /* eax += pso->pvScan0; */
+ mov ebx, [edx] /* ebx = prcl->left; */
+ lea esi, [eax+ebx*4] /* esi = pvLine0 = eax + 4 * prcl->left;
*/
+
+ mov ebx, [edx+8] /* ebx = prcl->right; */
+ sub ebx, [edx] /* ebx = prcl->right - prcl->left; */
+ jbe end /* if (ebx <= 0) goto end; */
+
+ mov edx, [edx+12] /* edx = prcl->bottom; */
+ sub edx, edi /* edx -= prcl->top; */
+ jbe end /* if (eax <= 0) goto end; */
+
+ mov eax, [ebp+16] /* eax = iColor; */
+ cld
+
+for_loop: /* do { */
+ mov edi, esi /* edi = pvLine0; */
+ mov ecx, ebx /* ecx = cx; */
+ rep stosd /* memset(pvLine0, iColor, cx); */
+ add esi, [esp] /* pvLine0 += lDelta; */
+ dec edx /* cy--; */
+ jnz for_loop /* } while (cy > 0); */
+
+end:
+ mov eax, 1
+ add esp, 4
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret