speed up 24bpp small fill and fill
Modified: trunk/reactos/subsys/win32k/dib/dib24bpp.c
_____
Modified: trunk/reactos/subsys/win32k/dib/dib24bpp.c
--- trunk/reactos/subsys/win32k/dib/dib24bpp.c 2005-06-11 12:13:28 UTC
(rev 15858)
+++ trunk/reactos/subsys/win32k/dib/dib24bpp.c 2005-06-11 13:36:00 UTC
(rev 15859)
@@ -392,11 +392,97 @@
{
ULONG DestY;
+#ifdef _M_IX86
+ PBYTE xaddr = DestSurface->pvScan0 + DestRect->top *
DestSurface->lDelta + (DestRect->left << 1) + DestRect->left;
+ PBYTE addr;
+ ULONG Count;
+ ULONG xCount=DestRect->right - DestRect->left;
+
for (DestY = DestRect->top; DestY< DestRect->bottom; DestY++)
+ {
+ Count = xCount;
+ addr = xaddr;
+ xaddr = (PBYTE)((ULONG_PTR)addr + DestSurface->lDelta);
+
+ if (Count < 8)
+ {
+ /* For small fills, don't bother doing anything fancy */
+ while (Count--)
+ {
+ *(PUSHORT)(addr) = color;
+ addr += 2;
+ *(addr) = color >> 16;
+ addr += 1;
+ }
+ }
+ else
+ {
+ /* Align to 4-byte address */
+ while (0 != ((ULONG_PTR) addr & 0x3))
+ {
+ *(PUSHORT)(addr) = color;
+ addr += 2;
+ *(addr) = color >> 16;
+ addr += 1;
+ Count--;
+ }
+ /* If the color we need to fill with is 0ABC, then the final mem
pattern
+ * (note little-endianness) would be:
+ *
+ * |C.B.A|C.B.A|C.B.A|C.B.A| <- pixel borders
+ * |C.B.A.C|B.A.C.B|A.C.B.A| <- ULONG borders
+ *
+ * So, taking endianness into account again, we need to fill with
these
+ * ULONGs: CABC BCAB ABCA */
+
+ /* This is about 30% faster than the generic C code below */
+ __asm__ __volatile__ (
+" movl %1, %%ecx\n"
+" andl $0xffffff, %%ecx\n" /* 0ABC */
+" movl %%ecx, %%ebx\n" /* Construct BCAB in ebx */
+" shrl $8, %%ebx\n"
+" movl %%ecx, %%eax\n"
+" shll $16, %%eax\n"
+" orl %%eax, %%ebx\n"
+" movl %%ecx, %%edx\n" /* Construct ABCA in edx */
+" shll $8, %%edx\n"
+" movl %%ecx, %%eax\n"
+" shrl $16, %%eax\n"
+" orl %%eax, %%edx\n"
+" movl %%ecx, %%eax\n" /* Construct CABC in eax */
+" shll $24, %%eax\n"
+" orl %%ecx, %%eax\n"
+" movl %2, %%ecx\n" /* Load count */
+" shr $2, %%ecx\n"
+" movl %3, %%edi\n" /* Load dest */
+".FL1:\n"
+" movl %%eax, (%%edi)\n" /* Store 4 pixels, 12 bytes */
+" movl %%ebx, 4(%%edi)\n"
+" movl %%edx, 8(%%edi)\n"
+" addl $12, %%edi\n"
+" dec %%ecx\n"
+" jnz .FL1\n"
+" movl %%edi, %0\n"
+ : "=m"(addr)
+ : "m"(color), "m"(Count), "m"(addr)
+ : "%eax", "%ebx", "%ecx", "%edx",
"%edi");
+ Count = Count & 0x03;
+ while (0 != Count--)
+ {
+ *(PUSHORT)(addr) = color;
+ addr += 2;
+ *(addr) = color >> 16;
+ addr += 1;
+ }
+ }
+ }
+#else
+
+ for (DestY = DestRect->top; DestY< DestRect->bottom; DestY++)
{
DIB_24BPP_HLine(DestSurface, DestRect->left, DestRect->right,
DestY, color);
}
-
+#endif
return TRUE;
}
Show replies by date