Below my C code based on the C code previously shown here, and the assembly
generated by vc.
This function, as most ones, does not benefit much from asm coding, although
some cycles can be saved, most notably inside the loop (a cmp and additional
branch in vc generated code).
Some algorithms can benefit a lot from asm, though. For example the Fletcher
checksum or incrementing/decrementing variables larger than the register
size, where the use of the carry flag can save many cycles. Also when a
function exec time is very critical may deserve asm coding, but I think in
this case it does not worth it, as the saving in percentage is tiny (any
compiler I know will use rep stosd for the inner loop, which has the largest
weight in the total time).
BOOLEAN DIB_32BPP_ColorFill(SURFOBJ* pso, RECTL* prcl, ULONG iColor)
{
LONG lDelta, cx, cy;
char * pulLine;
lDelta = pso->lDelta;
pulLine= (char *)((char *)pso->pvScan0 + prcl->top * lDelta +
(prcl->left << 2));
cx = prcl->right - prcl->left;
if (cx <= 0)
return TRUE;
cy = prcl->bottom - prcl->top;
if (cy <= 0)
return TRUE;
ULONG *p;
ULONG c;
for(; cy--; pulLine += lDelta)
{
for(p = (ULONG *)pulLine, c = cx; c--; )
{
*p++ = iColor;
}
}
return TRUE;
}
PUBLIC ?DIB_32BPP_ColorFill@@YAEPAU_SURFOBJ@@PAU_RECTL@@K@Z ;
DIB_32BPP_ColorFill
; Function compile flags: /Ogtpy
_TEXT SEGMENT
?DIB_32BPP_ColorFill@@YAEPAU_SURFOBJ@@PAU_RECTL@@K@Z PROC ;
DIB_32BPP_ColorFill
; Line 52
mov ecx, DWORD PTR ds:4
; Line 54
mov edx, DWORD PTR ds:8
push ebp
mov ebp, DWORD PTR ds:36
imul ecx, ebp
xor eax, eax
mov eax, DWORD PTR [eax]
push esi
lea esi, DWORD PTR [ecx+eax*4]
add esi, DWORD PTR ds:32
sub edx, eax
; Line 55
test edx, edx
; Line 56
jle SHORT $LN22@DIB_32BPP_
push ebx
; Line 58
mov ebx, DWORD PTR ds:12
sub ebx, DWORD PTR ds:4
; Line 59
test ebx, ebx
; Line 60
jle SHORT $LN21@DIB_32BPP_
push edi
npad 4
$LL18@DIB_32BPP_:
; Line 64
dec ebx
; Line 66
test edx, edx
je SHORT $LN2@DIB_32BPP_
mov ecx, edx
xor eax, eax
mov edi, esi
rep stosd
$LN2@DIB_32BPP_:
add esi, ebp
test ebx, ebx
jne SHORT $LL18@DIB_32BPP_
pop edi
$LN21@DIB_32BPP_:
pop ebx
$LN22@DIB_32BPP_:
pop esi
; Line 72
mov al, 1
pop ebp
; Line 73
ret 0
?DIB_32BPP_ColorFill@@YAEPAU_SURFOBJ@@PAU_RECTL@@K@Z ENDP ;
DIB_32BPP_ColorFill
In asm I would write the loop as:
mov eax, iColor
mov ebx, pulLine
mov edx, cy
L1:
mov di, bx
mov cx, _cx
rep stosd
add dx, lDelta
dec dx
jnz l1
Jose Catena
DIGIWAVES S.L.