Author: hbelusca Date: Fri Oct 17 22:08:51 2014 New Revision: 64792
URL: http://svn.reactos.org/svn/reactos?rev=64792&view=rev Log: [NTVDM] - Use a helper function for copying little chunks of memory (1, 2, 4 and 8 bytes) because a profiling of ntvdm showed that especially 1 and 2 bytes of memory were read the most, and calling RtlCopy/MoveMemory for intensively copying 1 or 2 bytes was shown to be inefficient. We also don't use directly intrinsics/builtins because the compiler cannot know in advance the size of the memory to be copied, it cannot perform the required optimizations. It was checked that using the builtin-memcpy or memmove of GCC when compiling the program in release+full optimization mode just embedded a call to _memcpy, and naively using the movsX intrinsics of MSVC does not do the job of "moving" memory taking into account for the possible overlaps. Therefore, for small sizes (<= 8 bytes), we use copy assignments, whereas for large sizes (and for 3, 5, 7, 9+ bytes) we use the regular method of calling RtlMoveMemory. We gain ~=10% speed with this optimization. - Also I use >> and & for dividing by 4 and 2 instead of the regular / and % operations because they are not optimized otherwise by default by MSVC (they are however, if you explicitely enable optimizations).
Modified: trunk/reactos/subsystems/ntvdm/emulator.c trunk/reactos/subsystems/ntvdm/io.c
Modified: trunk/reactos/subsystems/ntvdm/emulator.c URL: http://svn.reactos.org/svn/reactos/trunk/reactos/subsystems/ntvdm/emulator.c... ============================================================================== --- trunk/reactos/subsystems/ntvdm/emulator.c [iso-8859-1] (original) +++ trunk/reactos/subsystems/ntvdm/emulator.c [iso-8859-1] Fri Oct 17 22:08:51 2014 @@ -60,6 +60,75 @@
/* PRIVATE FUNCTIONS **********************************************************/
+static inline VOID +EmulatorMoveMemory(OUT VOID UNALIGNED *Destination, + IN const VOID UNALIGNED *Source, + IN SIZE_T Length) +{ +#if 1 + /* + * We use a switch here to detect small moves of memory, as these + * constitute the bulk of our moves. + * Using RtlMoveMemory for all these small moves would be slow otherwise. + */ + switch (Length) + { + case 0: + return; + + case sizeof(UCHAR): + *(PUCHAR)Destination = *(PUCHAR)Source; + return; + + case sizeof(USHORT): + *(PUSHORT)Destination = *(PUSHORT)Source; + return; + + case sizeof(ULONG): + *(PULONG)Destination = *(PULONG)Source; + return; + + case sizeof(ULONGLONG): + *(PULONGLONG)Destination = *(PULONGLONG)Source; + return; + + default: +#if defined(__GNUC__) + __builtin_memmove(Destination, Source, Length); +#else + RtlMoveMemory(Destination, Source, Length); +#endif + } + +#else // defined(_MSC_VER) + + PUCHAR Dest = (PUCHAR)Destination; + PUCHAR Src = (PUCHAR)Source; + + SIZE_T Count, NewSize = Length; + + /* Move dword */ + Count = NewSize >> 2; // NewSize / sizeof(ULONG); + NewSize = NewSize & 3; // NewSize % sizeof(ULONG); + __movsd(Dest, Src, Count); + Dest += Count << 2; // Count * sizeof(ULONG); + Src += Count << 2; + + /* Move word */ + Count = NewSize >> 1; // NewSize / sizeof(USHORT); + NewSize = NewSize & 1; // NewSize % sizeof(USHORT); + __movsw(Dest, Src, Count); + Dest += Count << 1; // Count * sizeof(USHORT); + Src += Count << 1; + + /* Move byte */ + Count = NewSize; // NewSize / sizeof(UCHAR); + // NewSize = NewSize; // NewSize % sizeof(UCHAR); + __movsb(Dest, Src, Count); + +#endif +} + VOID WINAPI EmulatorReadMemory(PFAST486_STATE State, ULONG Address, PVOID Buffer, ULONG Size) { UNREFERENCED_PARAMETER(State); @@ -91,7 +160,7 @@ }
/* Read the data from the virtual address space and store it in the buffer */ - RtlCopyMemory(Buffer, REAL_TO_PHYS(Address), Size); + EmulatorMoveMemory(Buffer, REAL_TO_PHYS(Address), Size); }
VOID WINAPI EmulatorWriteMemory(PFAST486_STATE State, ULONG Address, PVOID Buffer, ULONG Size) @@ -112,7 +181,7 @@ if ((Address + Size) >= ROM_AREA_START && (Address < ROM_AREA_END)) return;
/* Read the data from the buffer and store it in the virtual address space */ - RtlCopyMemory(REAL_TO_PHYS(Address), Buffer, Size); + EmulatorMoveMemory(REAL_TO_PHYS(Address), Buffer, Size);
/* * Check if we modified the VGA memory.
Modified: trunk/reactos/subsystems/ntvdm/io.c URL: http://svn.reactos.org/svn/reactos/trunk/reactos/subsystems/ntvdm/io.c?rev=6... ============================================================================== --- trunk/reactos/subsystems/ntvdm/io.c [iso-8859-1] (original) +++ trunk/reactos/subsystems/ntvdm/io.c [iso-8859-1] Fri Oct 17 22:08:51 2014 @@ -100,8 +100,7 @@ } else { - while (Count--) - *Buffer++ = IOReadB(Port); + while (Count--) *Buffer++ = IOReadB(Port); } }
@@ -196,8 +195,7 @@ } else { - while (Count--) - *Buffer++ = IOReadW(Port); + while (Count--) *Buffer++ = IOReadW(Port); } }
@@ -278,8 +276,7 @@ } else { - while (Count--) - *Buffer++ = IOReadD(Port); + while (Count--) *Buffer++ = IOReadD(Port); } }
@@ -379,7 +376,7 @@ } else { - PBYTE Address = (PBYTE)Buffer; + PUCHAR Address = (PUCHAR)Buffer;
while (DataCount--) { @@ -388,8 +385,8 @@ UCHAR NewDataSize = DataSize;
/* Read dword */ - Count = NewDataSize / sizeof(ULONG); - NewDataSize = NewDataSize % sizeof(ULONG); + Count = NewDataSize >> 2; // NewDataSize / sizeof(ULONG); + NewDataSize = NewDataSize & 3; // NewDataSize % sizeof(ULONG); while (Count--) { *(PULONG)Address = IOReadD(CurrentPort); @@ -398,8 +395,8 @@ }
/* Read word */ - Count = NewDataSize / sizeof(USHORT); - NewDataSize = NewDataSize % sizeof(USHORT); + Count = NewDataSize >> 1; // NewDataSize / sizeof(USHORT); + NewDataSize = NewDataSize & 1; // NewDataSize % sizeof(USHORT); while (Count--) { *(PUSHORT)Address = IOReadW(CurrentPort); @@ -408,17 +405,14 @@ }
/* Read byte */ - Count = NewDataSize / sizeof(UCHAR); - NewDataSize = NewDataSize % sizeof(UCHAR); + Count = NewDataSize; // NewDataSize / sizeof(UCHAR); + // NewDataSize = NewDataSize % sizeof(UCHAR); while (Count--) { *(PUCHAR)Address = IOReadB(CurrentPort); CurrentPort += sizeof(UCHAR); Address += sizeof(UCHAR); } - - ASSERT(Count == 0); - ASSERT(NewDataSize == 0); } } } @@ -457,7 +451,7 @@ } else { - PBYTE Address = (PBYTE)Buffer; + PUCHAR Address = (PUCHAR)Buffer;
while (DataCount--) { @@ -466,8 +460,8 @@ UCHAR NewDataSize = DataSize;
/* Write dword */ - Count = NewDataSize / sizeof(ULONG); - NewDataSize = NewDataSize % sizeof(ULONG); + Count = NewDataSize >> 2; // NewDataSize / sizeof(ULONG); + NewDataSize = NewDataSize & 3; // NewDataSize % sizeof(ULONG); while (Count--) { IOWriteD(CurrentPort, *(PULONG)Address); @@ -476,8 +470,8 @@ }
/* Write word */ - Count = NewDataSize / sizeof(USHORT); - NewDataSize = NewDataSize % sizeof(USHORT); + Count = NewDataSize >> 1; // NewDataSize / sizeof(USHORT); + NewDataSize = NewDataSize & 1; // NewDataSize % sizeof(USHORT); while (Count--) { IOWriteW(CurrentPort, *(PUSHORT)Address); @@ -486,17 +480,14 @@ }
/* Write byte */ - Count = NewDataSize / sizeof(UCHAR); - NewDataSize = NewDataSize % sizeof(UCHAR); + Count = NewDataSize; // NewDataSize / sizeof(UCHAR); + // NewDataSize = NewDataSize % sizeof(UCHAR); while (Count--) { IOWriteB(CurrentPort, *(PUCHAR)Address); CurrentPort += sizeof(UCHAR); Address += sizeof(UCHAR); } - - ASSERT(Count == 0); - ASSERT(NewDataSize == 0); } } }