Assembly for DIB_16BPP_VLine, nearly no performance increase, but could be useful later anyway.
Modified: trunk/reactos/subsys/win32k/dib/dib16bpp.c

Modified: trunk/reactos/subsys/win32k/dib/dib16bpp.c
--- trunk/reactos/subsys/win32k/dib/dib16bpp.c	2005-06-07 13:53:37 UTC (rev 15825)
+++ trunk/reactos/subsys/win32k/dib/dib16bpp.c	2005-06-07 15:09:02 UTC (rev 15826)
@@ -90,7 +90,39 @@
 
 VOID
 DIB_16BPP_VLine(SURFOBJ *SurfObj, LONG x, LONG y1, LONG y2, ULONG c)
-{
+{ /* 2610-2700 */
+#ifdef _M_IX86
+  asm volatile(
+    "   testl %2, %2"       "\n\t"
+    "   jle   2f"           "\n\t"
+    "   movl  %2, %%ecx"    "\n\t"
+    "   shr   $2, %2"       "\n\t"
+    "   andl  $3, %%ecx"    "\n\t"
+    "   jz    1f"           "\n\t"
+    "0:"                    "\n\t"
+    "   movw  %%ax, (%0)"   "\n\t"
+    "   addl  %1, %0"       "\n\t"
+    "   decl  %%ecx"        "\n\t"
+    "   jnz   0b"           "\n\t"
+    "   testl %2, %2"       "\n\t"
+    "   jz    2f"           "\n\t"
+    "1:"                    "\n\t"
+    "   movw  %%ax, (%0)"   "\n\t"
+    "   addl  %1, %0"       "\n\t"
+    "   movw  %%ax, (%0)"   "\n\t"
+    "   addl  %1, %0"       "\n\t"
+    "   movw  %%ax, (%0)"   "\n\t"
+    "   addl  %1, %0"       "\n\t"
+    "   movw  %%ax, (%0)"   "\n\t"
+    "   addl  %1, %0"       "\n\t"
+    "   decl  %2"           "\n\t"
+    "   jnz   1b"           "\n\t"
+    "2:"                    "\n\t"
+    : /* no output */
+    : "r"(SurfObj->pvScan0 + (y1 * SurfObj->lDelta) + (x * sizeof (WORD))), 
+      "r"(SurfObj->lDelta), "r"(y2 - y1), "a"(c)
+    : "cc", "memory", "%ecx");
+#else
   PBYTE byteaddr = SurfObj->pvScan0 + y1 * SurfObj->lDelta;
   PWORD addr = (PWORD)byteaddr + x;
   LONG lDelta = SurfObj->lDelta;
@@ -102,6 +134,7 @@
     byteaddr += lDelta;
     addr = (PWORD)byteaddr;
   }
+#endif
 }
 
 BOOLEAN