Author: arty Date: Thu Mar 29 04:43:44 2012 New Revision: 56267
URL: http://svn.reactos.org/svn/reactos?rev=56267&view=rev Log: [NTOSKRKNL]
Write a lot of general prose about the operation of the Mm parts of NewCC.
Dedicate to timo and chongo.
No code changes.
Modified: trunk/reactos/ntoskrnl/cache/section/data.c trunk/reactos/ntoskrnl/cache/section/fault.c trunk/reactos/ntoskrnl/cache/section/io.c trunk/reactos/ntoskrnl/cache/section/reqtools.c trunk/reactos/ntoskrnl/cache/section/sptab.c trunk/reactos/ntoskrnl/cache/section/swapout.c trunk/reactos/ntoskrnl/mm/rmap.c
Modified: trunk/reactos/ntoskrnl/cache/section/data.c URL: http://svn.reactos.org/svn/reactos/trunk/reactos/ntoskrnl/cache/section/data... ============================================================================== --- trunk/reactos/ntoskrnl/cache/section/data.c [iso-8859-1] (original) +++ trunk/reactos/ntoskrnl/cache/section/data.c [iso-8859-1] Thu Mar 29 04:43:44 2012 @@ -42,6 +42,36 @@ * Herve Poussineau */
+/* + +A note on this code: + +Unlike the previous section code, this code does not rely on an active map +for a page to exist in a data segment. Each mapping contains a large integer +offset to map at, and the segment always represents the entire section space +from zero to the maximum long long. This allows us to associate one single +page map with each file object, and to let each mapping view an offset into +the overall mapped file. Temporarily unmapping the file has no effect on the +section membership. + +This necessitates a change in the section page table implementation, which is +now an RtlGenericTable. This will be elaborated more in sptab.c. One upshot +of this change is that a mapping of a small files takes a bit more than 1/4 +of the size in nonpaged kernel space as it did previously. + +When we need other threads that may be competing for the same page fault to +wait, we have a mechanism seperate from PageOps for dealing with that, which +was suggested by Travis Geiselbrecht after a conversation I had with Alex +Ionescu. That mechanism is the MM_WAIT_ENTRY, which is the all-ones SWAPENTRY. + +When we wish for other threads to know that we're waiting and will finish +handling a page fault, we place the swap entry MM_WAIT_ENTRY in the page table +at the fault address (this works on either the section page table or a process +address space), perform any blocking operations required, then replace the +entry with + +*/ + /* INCLUDES *****************************************************************/
#include <ntoskrnl.h> @@ -163,6 +193,17 @@ return STATUS_SUCCESS; }
+/* + +MiFlushMappedSection + +Called from cache code to cause dirty pages of a section +to be written back. This doesn't affect the mapping. + +BaseOffset is the base at which to start writing in file space. +FileSize is the length of the file as understood by the cache. + + */ NTSTATUS NTAPI _MiFlushMappedSection(PVOID BaseAddress, @@ -299,6 +340,13 @@ return Status; }
+/* + +This deletes a segment entirely including its page map. +It must have been unmapped in every address space. + + */ + VOID NTAPI MmFinalizeSegment(PMM_SECTION_SEGMENT Segment) @@ -346,7 +394,7 @@ ULONG AllocationAttributes, PFILE_OBJECT FileObject) /* - * Create a section backed by a data file + * Create a section backed by a data file. */ { PROS_SECTION_OBJECT Section; @@ -641,6 +689,13 @@ return STATUS_SUCCESS; }
+/* + +Completely remove the page at FileOffset in Segment. The page must not +be mapped. + +*/ + VOID NTAPI MiFreeSegmentPage(PMM_SECTION_SEGMENT Segment,
Modified: trunk/reactos/ntoskrnl/cache/section/fault.c URL: http://svn.reactos.org/svn/reactos/trunk/reactos/ntoskrnl/cache/section/faul... ============================================================================== --- trunk/reactos/ntoskrnl/cache/section/fault.c [iso-8859-1] (original) +++ trunk/reactos/ntoskrnl/cache/section/fault.c [iso-8859-1] Thu Mar 29 04:43:44 2012 @@ -43,6 +43,34 @@ * Herve Poussineau */
+/* + +I've generally organized fault handling code in newmm as handlers that run +under a single lock acquisition, check the state, and either take necessary +action atomically, or place a wait entry and return a continuation to the +caller. This lends itself to code that has a simple, structured form, +doesn't make assumptions about lock taking and breaking, and provides an +obvious, graphic seperation between code that may block and code that isn't +allowed to. This file contains the non-blocking half. + +In order to request a blocking operation to happen outside locks, place a +function pointer in the provided MM_REQUIRED_RESOURCES struct and return +STATUS_MORE_PROCESSING_REQUIRED. The function indicated will receive the +provided struct and take action outside of any mm related locks and at +PASSIVE_LEVEL. The same fault handler will be called again after the +blocking operation succeeds. In this way, the fault handler can accumulate +state, but will freely work while competing with other threads. + +Fault handlers in this file should check for an MM_WAIT_ENTRY in a page +table they're using and return STATUS_SUCCESS + 1 if it's found. In that +case, the caller will wait on the wait entry event until the competing thread +is finished, and recall this handler in the current thread. + +Another thing to note here is that we require mappings to exactly mirror +rmaps, so each mapping should be immediately followed by an rmap addition. + +*/ + /* INCLUDES *****************************************************************/
#include <ntoskrnl.h> @@ -55,6 +83,22 @@
extern KEVENT MmWaitPageEvent; extern PMMWSL MmWorkingSetList; + +/* + +Multiple stage handling of a not-present fault in a data section. + +Required->State is used to accumulate flags that indicate the next action +the handler should take. + +State & 2 is currently used to indicate that the page acquired by a previous +callout is a global page to the section and should be placed in the section +page table. + +Note that the primitive tail recursion done here reaches the base case when +the page is present. + +*/
NTSTATUS NTAPI @@ -168,6 +212,10 @@ } else if (MM_IS_WAIT_PTE(Entry)) { + // Whenever MM_WAIT_ENTRY is required as a swap entry, we need to + // ask the fault handler to wait until we should continue. Rathern + // than recopy this boilerplate code everywhere, we just ask them + // to wait. MmUnlockSectionSegment(Segment); return STATUS_SUCCESS + 1; } @@ -254,6 +302,18 @@ MiUnmapPageInHyperSpace(Process, TempAddress, Irql); return STATUS_SUCCESS; } + +/* + +This function is deceptively named, in that it does the actual work of handling +access faults on data sections. In the case of the code that's present here, +we don't allow cow sections, but we do need this to unset the initial +PAGE_READONLY condition of pages faulted into the cache so that we can add +a dirty bit in the section page table on the first modification. + +In the ultimate form of this code, CoW is reenabled. + +*/
NTSTATUS NTAPI @@ -344,6 +404,8 @@ else return STATUS_SUCCESS; // Nonwait swap entry ... handle elsewhere } + /* Call out to acquire a page to copy to. We'll be re-called when + * the page has been allocated. */ Required->Page[1] = MmGetPfnForProcess(Process, Address); Required->Consumer = MC_CACHE; Required->Amount = 1; @@ -402,6 +464,15 @@ KEVENT Wait; AcquireResource DoAcquisition; } WORK_QUEUE_WITH_CONTEXT, *PWORK_QUEUE_WITH_CONTEXT; + +/* + +This is the work item used do blocking resource acquisition when a fault +handler returns STATUS_MORE_PROCESSING_REQUIRED. It's used to allow resource +acquisition to take place on a different stack, and outside of any locks used +by fault handling, making recursive fault handling possible when required. + +*/
VOID NTAPI @@ -414,6 +485,38 @@ DPRINT("Status %x\n", WorkItem->Status); KeSetEvent(&WorkItem->Wait, IO_NO_INCREMENT, FALSE); } + +/* + +This code seperates the action of fault handling into an upper and lower +handler to allow the inner handler to optionally be called in work item +if the stack is getting too deep. My experiments show that the third +recursive page fault taken at PASSIVE_LEVEL must be shunted away to a +worker thread. In the ultimate form of this code, the primary fault handler +makes this decision by using a thread-local counter to detect a too-deep +fault stack and call the inner fault handler in a worker thread if required. + +Note that faults are taken at passive level and have access to ordinary +driver entry points such as those that read and write files, and filesystems +should use paged structures whenever possible. This makes recursive faults +both a perfectly normal occurrance, and a worthwhile case to handle. + +The code below will repeatedly call MiCowSectionPage as long as it returns +either STATUS_SUCCESS + 1 or STATUS_MORE_PROCESSING_REQUIRED. In the more +processing required case, we call out to a blocking resource acquisition +function and then recall the faut handler with the shared state represented +by the MM_REQUIRED_RESOURCES struct. + +In the other case, we wait on the wait entry event and recall the handler. +Each time the wait entry event is signalled, one thread has removed an +MM_WAIT_ENTRY from a page table. + +In the ultimate form of this code, there is a single system wide fault handler +for each of access fault and not present and each memory area contains a +function pointer that indicates the active fault handler. Since the mm code +in reactos is currently fragmented, I didn't bring this change to trunk. + +*/
NTSTATUS NTAPI @@ -564,6 +667,17 @@ return Status; }
+/* + +This is the outer fault handler mentioned in the description of +MmpSectionAccsesFaultInner. It increments a fault depth count in the current +thread. + +In the ultimate form of this code, the lower fault handler will optionally +use the count to keep the kernel stack from overflowing. + +*/ + NTSTATUS NTAPI MmAccessFaultCacheSection(KPROCESSOR_MODE Mode, @@ -612,6 +726,16 @@
return Status; } + +/* + +As above, this code seperates the active part of fault handling from a carrier +that can use the thread's active fault count to determine whether a work item +is required. Also as above, this function repeatedly calls the active not +present fault handler until a clear success or failure is received, using a +return of STATUS_MORE_PROCESSING_REQUIRED or STATUS_SUCCESS + 1. + +*/
NTSTATUS NTAPI @@ -765,6 +889,14 @@ return Status; }
+/* + +Call the inner not present fault handler, keeping track of the fault count. +In the ultimate form of this code, optionally use a worker thread the handle +the fault in order to sidestep stack overflow in the multiple fault case. + +*/ + NTSTATUS NTAPI MmNotPresentFaultCacheSection(KPROCESSOR_MODE Mode,
Modified: trunk/reactos/ntoskrnl/cache/section/io.c URL: http://svn.reactos.org/svn/reactos/trunk/reactos/ntoskrnl/cache/section/io.c... ============================================================================== --- trunk/reactos/ntoskrnl/cache/section/io.c [iso-8859-1] (original) +++ trunk/reactos/ntoskrnl/cache/section/io.c [iso-8859-1] Thu Mar 29 04:43:44 2012 @@ -64,9 +64,13 @@ return IoGetRelatedDeviceObject(FileObject); }
-/* Note: - This completion function is really required. Paging io completion does almost - nothing, including freeing the mdls. */ +/* + +Note: +This completion function is really required. Paging io completion does almost +nothing, including freeing the mdls. + +*/ NTSTATUS NTAPI MiSimpleReadComplete(PDEVICE_OBJECT DeviceObject, @@ -94,6 +98,15 @@
return STATUS_SUCCESS; } + +/* + +MiSimpleRead is a convenience function that provides either paging or non +paging reads. The caching and mm systems use this in paging mode, where +a completion function is required as above. The Paging BOOLEAN determines +whether the read is issued as a paging read or as an ordinary buffered read. + +*/
NTSTATUS NTAPI @@ -177,6 +190,13 @@ return Status; }
+/* + +Convenience function for writing from kernel space. This issues a paging +write in all cases. + +*/ + NTSTATUS NTAPI _MiSimpleWrite(PFILE_OBJECT FileObject, @@ -259,6 +279,15 @@ extern KEVENT MpwThreadEvent; FAST_MUTEX MiWriteMutex;
+/* + +Function which uses MiSimpleWrite to write back a single page to a file. +The page in question does not need to be mapped. This function could be +made a bit more efficient by avoiding the copy and making a system space +mdl. + +*/ + NTSTATUS NTAPI _MiWriteBackPage(PFILE_OBJECT FileObject,
Modified: trunk/reactos/ntoskrnl/cache/section/reqtools.c URL: http://svn.reactos.org/svn/reactos/trunk/reactos/ntoskrnl/cache/section/reqt... ============================================================================== --- trunk/reactos/ntoskrnl/cache/section/reqtools.c [iso-8859-1] (original) +++ trunk/reactos/ntoskrnl/cache/section/reqtools.c [iso-8859-1] Thu Mar 29 04:43:44 2012 @@ -42,6 +42,13 @@ * Herve Poussineau */
+/* + This file contains functions used by fault.c to do blocking resource + acquisition. To call one of these functions, fill out your + MM_REQUIRED_RESOURCES with a pointer to the desired function and configure + the other members as below. + */ + /* INCLUDES *****************************************************************/
#include <ntoskrnl.h> @@ -55,6 +62,22 @@ NTAPI MmBuildMdlFromPages(PMDL Mdl, PPFN_NUMBER Pages);
+/* + +Blocking function to acquire zeroed pages from the balancer. + +Upon entry: + +Required->Amount: Number of pages to acquire +Required->Consumer: consumer to charge the page to + +Upon return: + +Required->Pages[0..Amount]: Allocated pages. + +The function fails unless all requested pages can be allocated. + + */ NTSTATUS NTAPI MiGetOnePage(PMMSUPPORT AddressSpace, @@ -85,6 +108,26 @@ return Status; }
+/* + +Blocking function to read (part of) a page from a file. + +Upon entry: + +Required->Context: a FILE_OBJECT to read +Required->Consumer: consumer to charge the page to +Required->FileOffset: Offset to read at +Required->Amount: Number of bytes to read (0 -> 4096) + +Upon return: + +Required->Page[Required->Offset]: The allocated and read in page + +The indicated page is filled to Required->Amount with file data and zeroed +afterward. + + */ + NTSTATUS NTAPI MiReadFilePage(PMMSUPPORT AddressSpace, @@ -158,6 +201,21 @@ return STATUS_SUCCESS; }
+/* + +Blocking function to read a swap page into a memory page. + +Upon entry: + +Required->Consumer: consumer to charge the page to +Required->SwapEntry: swap entry to use + +Upon return: + +Required->Page[Required->Offset]: Populated page + +*/ + NTSTATUS NTAPI MiSwapInPage(PMMSUPPORT AddressSpace, @@ -192,6 +250,22 @@
return Status; } + +/* + +A way to write a page without a lock acquired using the same blocking mechanism +as resource acquisition. + +Upon entry: + +Required->Page[Required->Offset]: Page to write +Required->Context: FILE_OBJECT to write to +Required->FileOffset: offset to write at + +This always does a paging write with whole page size. Note that paging IO +doesn't change the valid data length of a file. + +*/
NTSTATUS NTAPI
Modified: trunk/reactos/ntoskrnl/cache/section/sptab.c URL: http://svn.reactos.org/svn/reactos/trunk/reactos/ntoskrnl/cache/section/spta... ============================================================================== --- trunk/reactos/ntoskrnl/cache/section/sptab.c [iso-8859-1] (original) +++ trunk/reactos/ntoskrnl/cache/section/sptab.c [iso-8859-1] Thu Mar 29 04:43:44 2012 @@ -22,6 +22,34 @@ * * PROGRAMMERS: arty */ + +/* + +This file implements the section page table. It relies on rtl generic table +functionality to provide access to 256-page chunks. Calls to +MiSetPageEntrySectionSegment and MiGetPageEntrySectionSegment must be +synchronized by holding the segment lock. + +Each page table entry is a ULONG as in x86. + +Bit 1 is used as a swap entry indication as in the main page table. +Bit 2 is used as a dirty indication. A dirty page will eventually be written +back to the file. +Bits 3-11 are used as a map count in the legacy mm code, Note that zero is +illegal, as the legacy code does not take advantage of segment rmaps. +Therefore, every segment page is mapped in at least one address space, and +MmUnsharePageEntry is quite complicated. In addition, the page may also be +owned by the legacy cache manager, giving an implied additional reference. +Upper bits are a PFN_NUMBER. + +These functions, in addition to maintaining the segment page table also +automatically maintain the segment rmap by calling MmSetSectionAssociation +and MmDeleteSectionAssociation. Segment rmaps are discussed in rmap.c. The +upshot is that it is impossible to have a page properly registered in a segment +page table and not also found in a segment rmap that can be found from the +paging machinery. + +*/
/* INCLUDES *****************************************************************/
@@ -232,6 +260,17 @@ return Result; }
+/* + +Destroy the rtl generic table that serves as the section's page table. Call +the FreePage function for each non-zero entry in the section page table as +we go. Note that the page table is still techinally valid until after all +pages are destroyed, as we don't finally destroy the table until we've free +each slice. There is no order guarantee for deletion of individual elements +although it's in-order as written now. + +*/ + VOID NTAPI MmFreePageTablesSectionSegment(PMM_SECTION_SEGMENT Segment, @@ -271,6 +310,21 @@ DPRINT("Done\n"); }
+/* + +Retrieves the MM_SECTION_SEGMENT and fills in the LARGE_INTEGER Offset given +by the caller that corresponds to the page specified. This uses +MmGetSegmentRmap to find the rmap belonging to the segment itself, and uses +the result as a pointer to a 256-entry page table structure. The rmap also +includes 8 bits of offset information indication one of 256 page entries that +the rmap corresponds to. This information together gives us an exact offset +into the file, as well as the MM_SECTION_SEGMENT pointer stored in the page +table slice. + +NULL is returned is there is no segment rmap for the page. + +*/ + PMM_SECTION_SEGMENT NTAPI MmGetSectionAssociation(PFN_NUMBER Page,
Modified: trunk/reactos/ntoskrnl/cache/section/swapout.c URL: http://svn.reactos.org/svn/reactos/trunk/reactos/ntoskrnl/cache/section/swap... ============================================================================== --- trunk/reactos/ntoskrnl/cache/section/swapout.c [iso-8859-1] (original) +++ trunk/reactos/ntoskrnl/cache/section/swapout.c [iso-8859-1] Thu Mar 29 04:43:44 2012 @@ -43,6 +43,21 @@ * Herve Poussineau */
+/* + +This file implements page out infrastructure for cache type sections. This +is implemented a little differently from the legacy mm because mapping in an +address space and membership in a segment are considered separate. + +The general strategy here is to try to remove all mappings as gently as +possible, then to remove the page entry from the section itself as a final +step. If at any time during the page out operation, the page is mapped in +a new address space by a competing thread, the operation will abort before +the segment page is finally removed, and the page will be naturally faulted +back into any address spaces required in the normal way. + +*/ + /* INCLUDES *****************************************************************/
#include <ntoskrnl.h> @@ -58,6 +73,15 @@
FAST_MUTEX MiGlobalPageOperation;
+/* + +MmWithdrawSectionPage removes a page entry from the section segment, replacing +it with a wait entry. The caller must replace the wait entry with a 0, when +any required writing is done. The wait entry must remain until the page is +written to protect against cases where a fault brings a stale copy of the page +back before writing is complete. + +*/ PFN_NUMBER NTAPI MmWithdrawSectionPage(PMM_SECTION_SEGMENT Segment, @@ -119,6 +143,22 @@ return 0; } } + +/* + +This function determines whether the segment holds the very last reference to +the page being considered and if so, writes it back or discards it as +approriate. One small niggle here is that we might be holding the last +reference to the section segment associated with this page. That happens +when the segment is destroyed at the same time that an active swap operation +is occurring, and all maps were already withdrawn. In that case, it's our +responsiblity for finalizing the segment. + +Note that in the current code, WriteZero is always TRUE because the section +always backs a file. In the ultimate form of this code, it also writes back +pages without necessarily evicting them. In reactos' trunk, this is vestigal. + +*/
NTSTATUS NTAPI @@ -219,6 +259,20 @@ DPRINT("Status %x\n", Status); return Status; } + +/* + +The slightly misnamed MmPageOutCacheSection removes a page from an address +space in the manner of fault handlers found in fault.c. In the ultimate form +of the code, this is one of the function pointers stored in a memory area +to control how pages in that memory area are managed. + +Also misleading is the call to MmReleasePageMemoryConsumer, which releases +the reference held by this address space only. After all address spaces +have had MmPageOutCacheSection succeed on them for the indicated page, +then paging out of a cache page can continue. + +*/
NTSTATUS NTAPI @@ -257,12 +311,32 @@ MmDeleteVirtualMapping(Process, Address, FALSE, Dirty, &OurPage); ASSERT(OurPage == Required->Page[0]);
+ /* Note: this releases the reference held by this address space only. */ MmReleasePageMemoryConsumer(MC_CACHE, Required->Page[0]);
MmUnlockSectionSegment(Segment); MiSetPageEvent(Process, Address); return STATUS_SUCCESS; } + +/* + +This function is called by rmap when spare pages are needed by the blancer. +It attempts first to release the page from every address space in which it +appears, and, after a final check that no competing thread has mapped the +page again, uses MmFinalizeSectionPageOut to completely evict the page. If +that's successful, then a suitable non-page map will be left in the segment +page table, otherwise, the original page is replaced in the section page +map. Failure may result from a variety of conditions, but always leaves +the page mapped. + +This code is like the other fault handlers, in that MmPageOutCacheSection has +the option of returning either STATUS_SUCCESS + 1 to wait for a wait entry +to disppear or to use the blocking callout facility by returning +STATUS_MORE_PROCESSING_REQUIRED and placing a pointer to a function from +reqtools.c in the MM_REQUIRED_RESOURCES struct. + +*/
NTSTATUS NTAPI
Modified: trunk/reactos/ntoskrnl/mm/rmap.c URL: http://svn.reactos.org/svn/reactos/trunk/reactos/ntoskrnl/mm/rmap.c?rev=5626... ============================================================================== --- trunk/reactos/ntoskrnl/mm/rmap.c [iso-8859-1] (original) +++ trunk/reactos/ntoskrnl/mm/rmap.c [iso-8859-1] Thu Mar 29 04:43:44 2012 @@ -418,6 +418,20 @@ KeBugCheck(MEMORY_MANAGEMENT); }
+/* + +Return the process pointer given when a previous call to MmInsertRmap was +called with a process and address pointer that conform to the segment rmap +schema. In short, this requires the address part to be 0xffffff00 + n +where n is between 0 and 255. When such an rmap exists, it specifies a +segment rmap in which the process part is a pointer to a slice of a section +page table, and the low 8 bits of the address represent a page index in the +page table slice. Together, this information is used by +MmGetSectionAssociation to determine which page entry points to this page in +the segment page table. + +*/ + PVOID NTAPI MmGetSegmentRmap(PFN_NUMBER Page, PULONG RawOffset) @@ -445,6 +459,12 @@ return NULL; }
+/* + +Remove the section rmap associated with the indicated page, if it exists. + +*/ + VOID NTAPI MmDeleteSectionAssociation(PFN_NUMBER Page)