Author: hbelusca
Date: Fri Oct 17 22:08:51 2014
New Revision: 64792
URL:
http://svn.reactos.org/svn/reactos?rev=64792&view=rev
Log:
[NTVDM]
- Use a helper function for copying little chunks of memory (1, 2, 4 and 8 bytes) because
a profiling of ntvdm showed that especially 1 and 2 bytes of memory were read the most,
and calling RtlCopy/MoveMemory for intensively copying 1 or 2 bytes was shown to be
inefficient. We also don't use directly intrinsics/builtins because the compiler
cannot know in advance the size of the memory to be copied, it cannot perform the required
optimizations. It was checked that using the builtin-memcpy or memmove of GCC when
compiling the program in release+full optimization mode just embedded a call to _memcpy,
and naively using the movsX intrinsics of MSVC does not do the job of "moving"
memory taking into account for the possible overlaps. Therefore, for small sizes (<= 8
bytes), we use copy assignments, whereas for large sizes (and for 3, 5, 7, 9+ bytes) we
use the regular method of calling RtlMoveMemory. We gain ~=10% speed with this
optimization.
- Also I use >> and & for dividing by 4 and 2 instead of the regular / and %
operations because they are not optimized otherwise by default by MSVC (they are however,
if you explicitely enable optimizations).
Modified:
trunk/reactos/subsystems/ntvdm/emulator.c
trunk/reactos/subsystems/ntvdm/io.c
Modified: trunk/reactos/subsystems/ntvdm/emulator.c
URL:
http://svn.reactos.org/svn/reactos/trunk/reactos/subsystems/ntvdm/emulator.…
==============================================================================
--- trunk/reactos/subsystems/ntvdm/emulator.c [iso-8859-1] (original)
+++ trunk/reactos/subsystems/ntvdm/emulator.c [iso-8859-1] Fri Oct 17 22:08:51 2014
@@ -60,6 +60,75 @@
/* PRIVATE FUNCTIONS **********************************************************/
+static inline VOID
+EmulatorMoveMemory(OUT VOID UNALIGNED *Destination,
+ IN const VOID UNALIGNED *Source,
+ IN SIZE_T Length)
+{
+#if 1
+ /*
+ * We use a switch here to detect small moves of memory, as these
+ * constitute the bulk of our moves.
+ * Using RtlMoveMemory for all these small moves would be slow otherwise.
+ */
+ switch (Length)
+ {
+ case 0:
+ return;
+
+ case sizeof(UCHAR):
+ *(PUCHAR)Destination = *(PUCHAR)Source;
+ return;
+
+ case sizeof(USHORT):
+ *(PUSHORT)Destination = *(PUSHORT)Source;
+ return;
+
+ case sizeof(ULONG):
+ *(PULONG)Destination = *(PULONG)Source;
+ return;
+
+ case sizeof(ULONGLONG):
+ *(PULONGLONG)Destination = *(PULONGLONG)Source;
+ return;
+
+ default:
+#if defined(__GNUC__)
+ __builtin_memmove(Destination, Source, Length);
+#else
+ RtlMoveMemory(Destination, Source, Length);
+#endif
+ }
+
+#else // defined(_MSC_VER)
+
+ PUCHAR Dest = (PUCHAR)Destination;
+ PUCHAR Src = (PUCHAR)Source;
+
+ SIZE_T Count, NewSize = Length;
+
+ /* Move dword */
+ Count = NewSize >> 2; // NewSize / sizeof(ULONG);
+ NewSize = NewSize & 3; // NewSize % sizeof(ULONG);
+ __movsd(Dest, Src, Count);
+ Dest += Count << 2; // Count * sizeof(ULONG);
+ Src += Count << 2;
+
+ /* Move word */
+ Count = NewSize >> 1; // NewSize / sizeof(USHORT);
+ NewSize = NewSize & 1; // NewSize % sizeof(USHORT);
+ __movsw(Dest, Src, Count);
+ Dest += Count << 1; // Count * sizeof(USHORT);
+ Src += Count << 1;
+
+ /* Move byte */
+ Count = NewSize; // NewSize / sizeof(UCHAR);
+ // NewSize = NewSize; // NewSize % sizeof(UCHAR);
+ __movsb(Dest, Src, Count);
+
+#endif
+}
+
VOID WINAPI EmulatorReadMemory(PFAST486_STATE State, ULONG Address, PVOID Buffer, ULONG
Size)
{
UNREFERENCED_PARAMETER(State);
@@ -91,7 +160,7 @@
}
/* Read the data from the virtual address space and store it in the buffer */
- RtlCopyMemory(Buffer, REAL_TO_PHYS(Address), Size);
+ EmulatorMoveMemory(Buffer, REAL_TO_PHYS(Address), Size);
}
VOID WINAPI EmulatorWriteMemory(PFAST486_STATE State, ULONG Address, PVOID Buffer, ULONG
Size)
@@ -112,7 +181,7 @@
if ((Address + Size) >= ROM_AREA_START && (Address < ROM_AREA_END))
return;
/* Read the data from the buffer and store it in the virtual address space */
- RtlCopyMemory(REAL_TO_PHYS(Address), Buffer, Size);
+ EmulatorMoveMemory(REAL_TO_PHYS(Address), Buffer, Size);
/*
* Check if we modified the VGA memory.
Modified: trunk/reactos/subsystems/ntvdm/io.c
URL:
http://svn.reactos.org/svn/reactos/trunk/reactos/subsystems/ntvdm/io.c?rev=…
==============================================================================
--- trunk/reactos/subsystems/ntvdm/io.c [iso-8859-1] (original)
+++ trunk/reactos/subsystems/ntvdm/io.c [iso-8859-1] Fri Oct 17 22:08:51 2014
@@ -100,8 +100,7 @@
}
else
{
- while (Count--)
- *Buffer++ = IOReadB(Port);
+ while (Count--) *Buffer++ = IOReadB(Port);
}
}
@@ -196,8 +195,7 @@
}
else
{
- while (Count--)
- *Buffer++ = IOReadW(Port);
+ while (Count--) *Buffer++ = IOReadW(Port);
}
}
@@ -278,8 +276,7 @@
}
else
{
- while (Count--)
- *Buffer++ = IOReadD(Port);
+ while (Count--) *Buffer++ = IOReadD(Port);
}
}
@@ -379,7 +376,7 @@
}
else
{
- PBYTE Address = (PBYTE)Buffer;
+ PUCHAR Address = (PUCHAR)Buffer;
while (DataCount--)
{
@@ -388,8 +385,8 @@
UCHAR NewDataSize = DataSize;
/* Read dword */
- Count = NewDataSize / sizeof(ULONG);
- NewDataSize = NewDataSize % sizeof(ULONG);
+ Count = NewDataSize >> 2; // NewDataSize / sizeof(ULONG);
+ NewDataSize = NewDataSize & 3; // NewDataSize % sizeof(ULONG);
while (Count--)
{
*(PULONG)Address = IOReadD(CurrentPort);
@@ -398,8 +395,8 @@
}
/* Read word */
- Count = NewDataSize / sizeof(USHORT);
- NewDataSize = NewDataSize % sizeof(USHORT);
+ Count = NewDataSize >> 1; // NewDataSize / sizeof(USHORT);
+ NewDataSize = NewDataSize & 1; // NewDataSize % sizeof(USHORT);
while (Count--)
{
*(PUSHORT)Address = IOReadW(CurrentPort);
@@ -408,17 +405,14 @@
}
/* Read byte */
- Count = NewDataSize / sizeof(UCHAR);
- NewDataSize = NewDataSize % sizeof(UCHAR);
+ Count = NewDataSize; // NewDataSize / sizeof(UCHAR);
+ // NewDataSize = NewDataSize % sizeof(UCHAR);
while (Count--)
{
*(PUCHAR)Address = IOReadB(CurrentPort);
CurrentPort += sizeof(UCHAR);
Address += sizeof(UCHAR);
}
-
- ASSERT(Count == 0);
- ASSERT(NewDataSize == 0);
}
}
}
@@ -457,7 +451,7 @@
}
else
{
- PBYTE Address = (PBYTE)Buffer;
+ PUCHAR Address = (PUCHAR)Buffer;
while (DataCount--)
{
@@ -466,8 +460,8 @@
UCHAR NewDataSize = DataSize;
/* Write dword */
- Count = NewDataSize / sizeof(ULONG);
- NewDataSize = NewDataSize % sizeof(ULONG);
+ Count = NewDataSize >> 2; // NewDataSize / sizeof(ULONG);
+ NewDataSize = NewDataSize & 3; // NewDataSize % sizeof(ULONG);
while (Count--)
{
IOWriteD(CurrentPort, *(PULONG)Address);
@@ -476,8 +470,8 @@
}
/* Write word */
- Count = NewDataSize / sizeof(USHORT);
- NewDataSize = NewDataSize % sizeof(USHORT);
+ Count = NewDataSize >> 1; // NewDataSize / sizeof(USHORT);
+ NewDataSize = NewDataSize & 1; // NewDataSize % sizeof(USHORT);
while (Count--)
{
IOWriteW(CurrentPort, *(PUSHORT)Address);
@@ -486,17 +480,14 @@
}
/* Write byte */
- Count = NewDataSize / sizeof(UCHAR);
- NewDataSize = NewDataSize % sizeof(UCHAR);
+ Count = NewDataSize; // NewDataSize / sizeof(UCHAR);
+ // NewDataSize = NewDataSize % sizeof(UCHAR);
while (Count--)
{
IOWriteB(CurrentPort, *(PUCHAR)Address);
CurrentPort += sizeof(UCHAR);
Address += sizeof(UCHAR);
}
-
- ASSERT(Count == 0);
- ASSERT(NewDataSize == 0);
}
}
}