Author: sir_richard Date: Sun Mar 4 17:56:00 2012 New Revision: 56000
URL: http://svn.reactos.org/svn/reactos?rev=56000&view=rev Log: [MEGAPERF]: This build introduces the following performance boosts: - Using a dead stack lookaside list for up to 5 dead kernel thread stacks. 1500% improvement when rapidly destroying/creating threads, such as during second stage setup and many winetests. - Using a free pool page lookaside list for up to 12 free non-paged or paged pool pages. 800% improvement when allocating big pages from the pool, as well as during pool expansion. - Using a bucketized per-processor and local list (in the KPRCB) for block sizes between 1 and 32 bytes. 1000% improvement when rapidly allocating/freeing small pool allocations, and 8x reduction in pool fragmentation.
Modified: trunk/reactos/ntoskrnl/io/iomgr/device.c trunk/reactos/ntoskrnl/mm/ARM3/expool.c trunk/reactos/ntoskrnl/mm/ARM3/miarm.h trunk/reactos/ntoskrnl/mm/ARM3/mminit.c trunk/reactos/ntoskrnl/mm/ARM3/pool.c trunk/reactos/ntoskrnl/mm/ARM3/procsup.c
Modified: trunk/reactos/ntoskrnl/io/iomgr/device.c URL: http://svn.reactos.org/svn/reactos/trunk/reactos/ntoskrnl/io/iomgr/device.c?... ============================================================================== --- trunk/reactos/ntoskrnl/io/iomgr/device.c [iso-8859-1] (original) +++ trunk/reactos/ntoskrnl/io/iomgr/device.c [iso-8859-1] Sun Mar 4 17:56:00 2012 @@ -367,7 +367,7 @@ /* We can't unload unless there's an unload handler */ if (!DriverObject->DriverUnload) { - DPRINT1("No DriverUnload function! '%wZ' will not be unloaded!\n", &DriverObject->DriverName); + DPRINT("No DriverUnload function! '%wZ' will not be unloaded!\n", &DriverObject->DriverName); return; }
Modified: trunk/reactos/ntoskrnl/mm/ARM3/expool.c URL: http://svn.reactos.org/svn/reactos/trunk/reactos/ntoskrnl/mm/ARM3/expool.c?r... ============================================================================== --- trunk/reactos/ntoskrnl/mm/ARM3/expool.c [iso-8859-1] (original) +++ trunk/reactos/ntoskrnl/mm/ARM3/expool.c [iso-8859-1] Sun Mar 4 17:56:00 2012 @@ -1346,7 +1346,7 @@ { ULONG i; PPOOL_DESCRIPTOR PoolDesc; - + // // Assume all failures // @@ -1414,6 +1414,8 @@ KIRQL OldIrql; USHORT BlockSize, i; ULONG OriginalType; + PKPRCB Prcb = KeGetCurrentPrcb(); + PGENERAL_LOOKASIDE LookasideList;
// // Some sanity checks @@ -1560,6 +1562,57 @@ // i = (USHORT)((NumberOfBytes + sizeof(POOL_HEADER) + (POOL_BLOCK_SIZE - 1)) / POOL_BLOCK_SIZE); + + // + // Handle lookaside list optimization for both paged and nonpaged pool + // + if (i <= MAXIMUM_PROCESSORS) + { + // + // Try popping it from the per-CPU lookaside list + // + LookasideList = (PoolType == PagedPool) ? + Prcb->PPPagedLookasideList[i - 1].P : + Prcb->PPNPagedLookasideList[i - 1].P; + LookasideList->TotalAllocates++; + Entry = (PPOOL_HEADER)InterlockedPopEntrySList(&LookasideList->ListHead); + if (!Entry) + { + // + // We failed, try popping it from the global list + // + LookasideList = (PoolType == PagedPool) ? + Prcb->PPPagedLookasideList[i - 1].L : + Prcb->PPNPagedLookasideList[i - 1].L; + LookasideList->TotalAllocates++; + Entry = (PPOOL_HEADER)InterlockedPopEntrySList(&LookasideList->ListHead); + } + + // + // If we were able to pop it, update the accounting and return the block + // + if (Entry) + { + LookasideList->AllocateHits++; + + // + // Get the real entry, write down its pool type, and track it + // + Entry--; + Entry->PoolType = PoolType + 1; + ExpInsertPoolTracker(Tag, + Entry->BlockSize * POOL_BLOCK_SIZE, + OriginalType); + + // + // Return the pool allocation + // + Entry->PoolTag = Tag; + (POOL_FREE_BLOCK(Entry))->Flink = NULL; + (POOL_FREE_BLOCK(Entry))->Blink = NULL; + return POOL_FREE_BLOCK(Entry); + } + }
// // Loop in the free lists looking for a block if this size. Start with the @@ -1902,6 +1955,8 @@ ULONG Tag; BOOLEAN Combined = FALSE; PFN_NUMBER PageCount, RealPageCount; + PKPRCB Prcb = KeGetCurrentPrcb(); + PGENERAL_LOOKASIDE LookasideList;
// // Check if any of the debug flags are enabled @@ -2073,6 +2128,40 @@ Entry->PoolType - 1);
// + // Is this allocation small enough to have come from a lookaside list? + // + if (BlockSize <= MAXIMUM_PROCESSORS) + { + // + // Try pushing it into the per-CPU lookaside list + // + LookasideList = (PoolType == PagedPool) ? + Prcb->PPPagedLookasideList[BlockSize - 1].P : + Prcb->PPNPagedLookasideList[BlockSize - 1].P; + LookasideList->TotalFrees++; + if (ExQueryDepthSList(&LookasideList->ListHead) < LookasideList->Depth) + { + LookasideList->FreeHits++; + InterlockedPushEntrySList(&LookasideList->ListHead, P); + return; + } + + // + // We failed, try to push it into the global lookaside list + // + LookasideList = (PoolType == PagedPool) ? + Prcb->PPPagedLookasideList[BlockSize - 1].L : + Prcb->PPNPagedLookasideList[BlockSize - 1].L; + LookasideList->TotalFrees++; + if (ExQueryDepthSList(&LookasideList->ListHead) < LookasideList->Depth) + { + LookasideList->FreeHits++; + InterlockedPushEntrySList(&LookasideList->ListHead, P); + return; + } + } + + // // Get the pointer to the next entry // NextEntry = POOL_BLOCK(Entry, BlockSize);
Modified: trunk/reactos/ntoskrnl/mm/ARM3/miarm.h URL: http://svn.reactos.org/svn/reactos/trunk/reactos/ntoskrnl/mm/ARM3/miarm.h?re... ============================================================================== --- trunk/reactos/ntoskrnl/mm/ARM3/miarm.h [iso-8859-1] (original) +++ trunk/reactos/ntoskrnl/mm/ARM3/miarm.h [iso-8859-1] Sun Mar 4 17:56:00 2012 @@ -598,6 +598,8 @@ extern PVOID MiSessionPoolEnd; // 0xBE000000 extern PVOID MiSessionPoolStart; // 0xBD000000 extern PVOID MiSessionViewStart; // 0xBE000000 +extern ULONG MmMaximumDeadKernelStacks; +extern SLIST_HEADER MmDeadStackSListHead;
BOOLEAN FORCEINLINE
Modified: trunk/reactos/ntoskrnl/mm/ARM3/mminit.c URL: http://svn.reactos.org/svn/reactos/trunk/reactos/ntoskrnl/mm/ARM3/mminit.c?r... ============================================================================== --- trunk/reactos/ntoskrnl/mm/ARM3/mminit.c [iso-8859-1] (original) +++ trunk/reactos/ntoskrnl/mm/ARM3/mminit.c [iso-8859-1] Sun Mar 4 17:56:00 2012 @@ -2076,6 +2076,9 @@ KeInitializeEvent(&MmZeroingPageEvent, SynchronizationEvent, FALSE); MmZeroingPageThreadActive = FALSE;
+ /* Initialize the dead stack S-LIST */ + InitializeSListHead(&MmDeadStackSListHead); + // // Check if this is a machine with less than 19MB of RAM // @@ -2268,18 +2271,21 @@ { /* Set small system */ MmSystemSize = MmSmallSystem; + MmMaximumDeadKernelStacks = 0; } else if (MmNumberOfPhysicalPages <= ((19 * _1MB) / PAGE_SIZE)) { /* Set small system and add 100 pages for the cache */ MmSystemSize = MmSmallSystem; MmSystemCacheWsMinimum += 100; + MmMaximumDeadKernelStacks = 2; } else { /* Set medium system and add 400 pages for the cache */ MmSystemSize = MmMediumSystem; MmSystemCacheWsMinimum += 400; + MmMaximumDeadKernelStacks = 5; }
/* Check for less than 24MB */
Modified: trunk/reactos/ntoskrnl/mm/ARM3/pool.c URL: http://svn.reactos.org/svn/reactos/trunk/reactos/ntoskrnl/mm/ARM3/pool.c?rev... ============================================================================== --- trunk/reactos/ntoskrnl/mm/ARM3/pool.c [iso-8859-1] (original) +++ trunk/reactos/ntoskrnl/mm/ARM3/pool.c [iso-8859-1] Sun Mar 4 17:56:00 2012 @@ -27,6 +27,10 @@ ULONG MmSpecialPoolTag; ULONG MmConsumedPoolPercentage; BOOLEAN MmProtectFreedNonPagedPool; +SLIST_HEADER MiNonPagedPoolSListHead; +ULONG MiNonPagedPoolSListMaximum = 4; +SLIST_HEADER MiPagedPoolSListHead; +ULONG MiPagedPoolSListMaximum = 8;
/* PRIVATE FUNCTIONS **********************************************************/
@@ -276,6 +280,34 @@ PMMFREE_POOL_ENTRY FreeEntry, FirstEntry; PMMPTE PointerPte; PAGED_CODE(); + + // + // Initialize the pool S-LISTs as well as their maximum count. In general, + // we'll allow 8 times the default on a 2GB system, and two times the default + // on a 1GB system. + // + InitializeSListHead(&MiPagedPoolSListHead); + InitializeSListHead(&MiNonPagedPoolSListHead); + if (MmNumberOfPhysicalPages >= ((2 * _1GB) /PAGE_SIZE)) + { + MiNonPagedPoolSListMaximum *= 8; + MiPagedPoolSListMaximum *= 8; + } + else if (MmNumberOfPhysicalPages >= (_1GB /PAGE_SIZE)) + { + MiNonPagedPoolSListMaximum *= 2; + MiPagedPoolSListMaximum *= 2; + } + + // + // However if debugging options for the pool are enabled, turn off the S-LIST + // to reduce the risk of messing things up even more + // + if (MmProtectFreedNonPagedPool) + { + MiNonPagedPoolSListMaximum = 0; + MiPagedPoolSListMaximum = 0; + }
// // We keep 4 lists of free pages (4 lists help avoid contention) @@ -411,6 +443,15 @@ if ((PoolType & BASE_POOL_TYPE_MASK) == PagedPool) { // + // If only one page is being requested, try to grab it from the S-LIST + // + if ((SizeInPages == 1) && (ExQueryDepthSList(&MiPagedPoolSListHead))) + { + BaseVa = InterlockedPopEntrySList(&MiPagedPoolSListHead); + if (BaseVa) return BaseVa; + } + + // // Lock the paged pool mutex // KeAcquireGuardedMutex(&MmPagedPoolMutex); @@ -611,6 +652,15 @@ }
// + // If only one page is being requested, try to grab it from the S-LIST + // + if ((SizeInPages == 1) && (ExQueryDepthSList(&MiNonPagedPoolSListHead))) + { + BaseVa = InterlockedPopEntrySList(&MiNonPagedPoolSListHead); + if (BaseVa) return BaseVa; + } + + // // Allocations of less than 4 pages go into their individual buckets // i = SizeInPages - 1; @@ -861,9 +911,16 @@ while (!RtlTestBit(MmPagedPoolInfo.EndOfPagedPoolBitmap, End)) End++;
// - // Now calculate the total number of pages this allocation spans + // Now calculate the total number of pages this allocation spans. If it's + // only one page, add it to the S-LIST instead of freeing it // NumberOfPages = End - i + 1; + if ((NumberOfPages == 1) && + (ExQueryDepthSList(&MiPagedPoolSListHead) < MiPagedPoolSListMaximum)) + { + InterlockedPushEntrySList(&MiPagedPoolSListHead, StartingVa); + return 1; + }
/* Delete the actual pages */ PointerPte = MmPagedPoolInfo.FirstPteForPagedPool + i; @@ -898,10 +955,18 @@ }
// - // Get the first PTE and its corresponding PFN entry + // Get the first PTE and its corresponding PFN entry. If this is also the + // last PTE, meaning that this allocation was only for one page, push it into + // the S-LIST instead of freeing it // StartPte = PointerPte = MiAddressToPte(StartingVa); StartPfn = Pfn1 = MiGetPfnEntry(PointerPte->u.Hard.PageFrameNumber); + if ((Pfn1->u3.e1.EndOfAllocation == 1) && + (ExQueryDepthSList(&MiNonPagedPoolSListHead) < MiNonPagedPoolSListMaximum)) + { + InterlockedPushEntrySList(&MiNonPagedPoolSListHead, StartingVa); + return 1; + }
// // Loop until we find the last PTE
Modified: trunk/reactos/ntoskrnl/mm/ARM3/procsup.c URL: http://svn.reactos.org/svn/reactos/trunk/reactos/ntoskrnl/mm/ARM3/procsup.c?... ============================================================================== --- trunk/reactos/ntoskrnl/mm/ARM3/procsup.c [iso-8859-1] (original) +++ trunk/reactos/ntoskrnl/mm/ARM3/procsup.c [iso-8859-1] Sun Mar 4 17:56:00 2012 @@ -19,6 +19,8 @@
ULONG MmProcessColorSeed = 0x12345678; PMMWSL MmWorkingSetList; +ULONG MmMaximumDeadKernelStacks = 5; +SLIST_HEADER MmDeadStackSListHead;
/* PRIVATE FUNCTIONS **********************************************************/
@@ -235,6 +237,19 @@ PointerPte--;
// + // If this is a small stack, just push the stack onto the dead stack S-LIST + // + if (!GuiStack) + { + if (ExQueryDepthSList(&MmDeadStackSListHead) < MmMaximumDeadKernelStacks) + { + Pfn1 = MiGetPfnEntry(PointerPte->u.Hard.PageFrameNumber); + InterlockedPushEntrySList(&MmDeadStackSListHead, &Pfn1->u1.NextStackPfn); + return; + } + } + + // // Calculate pages used // StackPages = BYTES_TO_PAGES(GuiStack ? @@ -303,6 +318,7 @@ KIRQL OldIrql; PFN_NUMBER PageFrameIndex; ULONG i; + PMMPFN Pfn1;
// // Calculate pages needed @@ -318,6 +334,21 @@ } else { + // + // If the dead stack S-LIST has a stack on it, use it instead of allocating + // new system PTEs for this stack + // + if (ExQueryDepthSList(&MmDeadStackSListHead)) + { + Pfn1 = (PMMPFN)InterlockedPopEntrySList(&MmDeadStackSListHead); + if (Pfn1) + { + PointerPte = Pfn1->PteAddress; + BaseAddress = MiPteToAddress(++PointerPte); + return BaseAddress; + } + } + // // We'll allocate 12K and that's it //