30 Matching Annotations
  1. Mar 2025
    1. int migrate_pages(struct list_head *from, new_folio_t get_new_folio, free_folio_t put_new_folio, unsigned long private, enum migrate_mode mode, int reason, unsigned int *ret_succeeded) { int rc, rc_gather; int nr_pages; struct folio *folio, *folio2; LIST_HEAD(folios); LIST_HEAD(ret_folios); LIST_HEAD(split_folios); struct migrate_pages_stats stats; trace_mm_migrate_pages_start(mode, reason); memset(&stats, 0, sizeof(stats)); rc_gather = migrate_hugetlbs(from, get_new_folio, put_new_folio, private, mode, reason, &stats, &ret_folios); if (rc_gather < 0) goto out; again: nr_pages = 0; list_for_each_entry_safe(folio, folio2, from, lru) { /* Retried hugetlb folios will be kept in list */ if (folio_test_hugetlb(folio)) { list_move_tail(&folio->lru, &ret_folios); continue; } nr_pages += folio_nr_pages(folio); if (nr_pages >= NR_MAX_BATCHED_MIGRATION) break; } if (nr_pages >= NR_MAX_BATCHED_MIGRATION) list_cut_before(&folios, from, &folio2->lru); else list_splice_init(from, &folios); if (mode == MIGRATE_ASYNC) rc = migrate_pages_batch(&folios, get_new_folio, put_new_folio, private, mode, reason, &ret_folios, &split_folios, &stats, NR_MAX_MIGRATE_PAGES_RETRY); else rc = migrate_pages_sync(&folios, get_new_folio, put_new_folio, private, mode, reason, &ret_folios, &split_folios, &stats); list_splice_tail_init(&folios, &ret_folios); if (rc < 0) { rc_gather = rc; list_splice_tail(&split_folios, &ret_folios); goto out; } if (!list_empty(&split_folios)) { /* * Failure isn't counted since all split folios of a large folio * is counted as 1 failure already. And, we only try to migrate * with minimal effort, force MIGRATE_ASYNC mode and retry once. */ migrate_pages_batch(&split_folios, get_new_folio, put_new_folio, private, MIGRATE_ASYNC, reason, &ret_folios, NULL, &stats, 1); list_splice_tail_init(&split_folios, &ret_folios); } rc_gather += rc; if (!list_empty(from)) goto again; out: /* * Put the permanent failure folio back to migration list, they * will be put back to the right list by the caller. */ list_splice(&ret_folios, from); /* * Return 0 in case all split folios of fail-to-migrate large folios * are migrated successfully. */ if (list_empty(from)) rc_gather = 0; count_vm_events(PGMIGRATE_SUCCESS, stats.nr_succeeded); count_vm_events(PGMIGRATE_FAIL, stats.nr_failed_pages); count_vm_events(THP_MIGRATION_SUCCESS, stats.nr_thp_succeeded); count_vm_events(THP_MIGRATION_FAIL, stats.nr_thp_failed); count_vm_events(THP_MIGRATION_SPLIT, stats.nr_thp_split); trace_mm_migrate_pages(stats.nr_succeeded, stats.nr_failed_pages, stats.nr_thp_succeeded, stats.nr_thp_failed, stats.nr_thp_split, mode, reason); if (ret_succeeded) *ret_succeeded = stats.nr_succeeded; return rc_gather; }

      Might.... be main func for migrating pages maintained by a folio to another free folio. No indication of policy... Except for the macros. Also worth looking into how list data structs are used. Overall not an interesting func despite linux kernel documentation (not the comments in here)

  2. Nov 2024
    1. #ifdef CONFIG_ARCH_HAS_HUGEPD static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end, unsigned long sz) { unsigned long __boundary = (addr + sz) & ~(sz-1); return (__boundary - 1 < end - 1) ? __boundary : end; } static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, unsigned long end, unsigned int flags, struct page **pages, int *nr) { unsigned long pte_end; struct page *page; struct folio *folio; pte_t pte; int refs; pte_end = (addr + sz) & ~(sz-1); if (pte_end < end) end = pte_end; pte = huge_ptep_get(ptep); if (!pte_access_permitted(pte, flags & FOLL_WRITE)) return 0; /* hugepages are never "special" */ VM_BUG_ON(!pfn_valid(pte_pfn(pte))); page = nth_page(pte_page(pte), (addr & (sz - 1)) >> PAGE_SHIFT); refs = record_subpages(page, addr, end, pages + *nr); folio = try_grab_folio(page, refs, flags); if (!folio) return 0; if (unlikely(pte_val(pte) != pte_val(ptep_get(ptep)))) { gup_put_folio(folio, refs, flags); return 0; } if (!folio_fast_pin_allowed(folio, flags)) { gup_put_folio(folio, refs, flags); return 0; } if (!pte_write(pte) && gup_must_unshare(NULL, flags, &folio->page)) { gup_put_folio(folio, refs, flags); return 0; } *nr += refs; folio_set_referenced(folio); return 1; } static int gup_huge_pd(hugepd_t hugepd, unsigned long addr, unsigned int pdshift, unsigned long end, unsigned int flags, struct page **pages, int *nr) { pte_t *ptep; unsigned long sz = 1UL << hugepd_shift(hugepd); unsigned long next; ptep = hugepte_offset(hugepd, addr, pdshift); do { next = hugepte_addr_end(addr, end, sz); if (!gup_hugepte(ptep, sz, addr, end, flags, pages, nr)) return 0; } while (ptep++, addr = next, addr != end); return 1; } #else static inline int gup_huge_pd(hugepd_t hugepd, unsigned long addr, unsigned int pdshift, unsigned long end, unsigned int flags, struct page **pages, int *nr) { return 0; } #endif /* CONFIG_ARCH_HAS_HUGEPD */ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr, unsigned long end, unsigned int flags, struct page **pages, int *nr) { struct page *page; struct folio *folio; int refs; if (!pmd_access_permitted(orig, flags & FOLL_WRITE)) return 0; if (pmd_devmap(orig)) { if (unlikely(flags & FOLL_LONGTERM)) return 0; return __gup_device_huge_pmd(orig, pmdp, addr, end, flags, pages, nr); } page = nth_page(pmd_page(orig), (addr & ~PMD_MASK) >> PAGE_SHIFT); refs = record_subpages(page, addr, end, pages + *nr); folio = try_grab_folio(page, refs, flags); if (!folio) return 0; if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) { gup_put_folio(folio, refs, flags); return 0; } if (!folio_fast_pin_allowed(folio, flags)) { gup_put_folio(folio, refs, flags); return 0; } if (!pmd_write(orig) && gup_must_unshare(NULL, flags, &folio->page)) { gup_put_folio(folio, refs, flags); return 0; } *nr += refs; folio_set_referenced(folio); return 1; } static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr, unsigned long end, unsigned int flags, struct page **pages, int *nr) { struct page *page; struct folio *folio; int refs; if (!pud_access_permitted(orig, flags & FOLL_WRITE)) return 0; if (pud_devmap(orig)) { if (unlikely(flags & FOLL_LONGTERM)) return 0; return __gup_device_huge_pud(orig, pudp, addr, end, flags, pages, nr); } page = nth_page(pud_page(orig), (addr & ~PUD_MASK) >> PAGE_SHIFT); refs = record_subpages(page, addr, end, pages + *nr); folio = try_grab_folio(page, refs, flags); if (!folio) return 0; if (unlikely(pud_val(orig) != pud_val(*pudp))) { gup_put_folio(folio, refs, flags); return 0; } if (!folio_fast_pin_allowed(folio, flags)) { gup_put_folio(folio, refs, flags); return 0; } if (!pud_write(orig) && gup_must_unshare(NULL, flags, &folio->page)) { gup_put_folio(folio, refs, flags); return 0; } *nr += refs; folio_set_referenced(folio); return 1; } static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr, unsigned long end, unsigned int flags, struct page **pages, int *nr) { int refs; struct page *page; struct folio *folio; if (!pgd_access_permitted(orig, flags & FOLL_WRITE)) return 0; BUILD_BUG_ON(pgd_devmap(orig)); page = nth_page(pgd_page(orig), (addr & ~PGDIR_MASK) >> PAGE_SHIFT); refs = record_subpages(page, addr, end, pages + *nr); folio = try_grab_folio(page, refs, flags); if (!folio) return 0; if (unlikely(pgd_val(orig) != pgd_val(*pgdp))) { gup_put_folio(folio, refs, flags); return 0; } if (!pgd_write(orig) && gup_must_unshare(NULL, flags, &folio->page)) { gup_put_folio(folio, refs, flags); return 0; } if (!folio_fast_pin_allowed(folio, flags)) { gup_put_folio(folio, refs, flags); return 0; } *nr += refs; folio_set_referenced(folio); return 1; } static int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, unsigned long end, unsigned int flags, struct page **pages, int *nr) { unsigned long next; pmd_t *pmdp; pmdp = pmd_offset_lockless(pudp, pud, addr); do { pmd_t pmd = pmdp_get_lockless(pmdp); next = pmd_addr_end(addr, end); if (!pmd_present(pmd)) return 0; if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd) || pmd_devmap(pmd))) { /* See gup_pte_range() */ if (pmd_protnone(pmd)) return 0; if (!gup_huge_pmd(pmd, pmdp, addr, next, flags, pages, nr)) return 0; } else if (unlikely(is_hugepd(__hugepd(pmd_val(pmd))))) { /* * architecture have different format for hugetlbfs * pmd format and THP pmd format */ if (!gup_huge_pd(__hugepd(pmd_val(pmd)), addr, PMD_SHIFT, next, flags, pages, nr)) return 0; } else if (!gup_pte_range(pmd, pmdp, addr, next, flags, pages, nr)) return 0; } while (pmdp++, addr = next, addr != end); return 1; } static int gup_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr, unsigned long end, unsigned int flags, struct page **pages, int *nr) { unsigned long next; pud_t *pudp; pudp = pud_offset_lockless(p4dp, p4d, addr); do { pud_t pud = READ_ONCE(*pudp); next = pud_addr_end(addr, end); if (unlikely(!pud_present(pud))) return 0; if (unlikely(pud_huge(pud) || pud_devmap(pud))) { if (!gup_huge_pud(pud, pudp, addr, next, flags, pages, nr)) return 0; } else if (unlikely(is_hugepd(__hugepd(pud_val(pud))))) { if (!gup_huge_pd(__hugepd(pud_val(pud)), addr, PUD_SHIFT, next, flags, pages, nr)) return 0; } else if (!gup_pmd_range(pudp, pud, addr, next, flags, pages, nr)) return 0; } while (pudp++, addr = next, addr != end); return 1; } static int gup_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, unsigned long end, unsigned int flags, struct page **pages, int *nr) { unsigned long next; p4d_t *p4dp; p4dp = p4d_offset_lockless(pgdp, pgd, addr); do { p4d_t p4d = READ_ONCE(*p4dp); next = p4d_addr_end(addr, end); if (p4d_none(p4d)) return 0; BUILD_BUG_ON(p4d_huge(p4d)); if (unlikely(is_hugepd(__hugepd(p4d_val(p4d))))) { if (!gup_huge_pd(__hugepd(p4d_val(p4d)), addr, P4D_SHIFT, next, flags, pages, nr)) return 0; } else if (!gup_pud_range(p4dp, p4d, addr, next, flags, pages, nr)) return 0; } while (p4dp++, addr = next, addr != end); return 1; } static void gup_pgd_range(unsigned long addr, unsigned long end, unsigned int flags, struct page **pages, int *nr) { unsigned long next; pgd_t *pgdp; pgdp = pgd_offset(current->mm, addr); do { pgd_t pgd = READ_ONCE(*pgdp); next = pgd_addr_end(addr, end); if (pgd_none(pgd)) return; if (unlikely(pgd_huge(pgd))) { if (!gup_huge_pgd(pgd, pgdp, addr, next, flags, pages, nr)) return; } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) { if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr, PGDIR_SHIFT, next, flags, pages, nr)) return; } else if (!gup_p4d_range(pgdp, pgd, addr, next, flags, pages, nr)) return; } while (pgdp++, addr = next, addr != end); } #else static inline void gup_pgd_range(unsigned long addr, unsigned long end, unsigned int flags, struct page **pages, int *nr) { }

      policy use functions for gup_huge pte policy code function above (not right above, gotta scroll probably to find it)

    2. static int internal_get_user_pages_fast(unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages) { unsigned long len, end; unsigned long nr_pinned; int locked = 0; int ret; if (WARN_ON_ONCE(gup_flags & ~(FOLL_WRITE | FOLL_LONGTERM | FOLL_FORCE | FOLL_PIN | FOLL_GET | FOLL_FAST_ONLY | FOLL_NOFAULT | FOLL_PCI_P2PDMA | FOLL_HONOR_NUMA_FAULT))) return -EINVAL; if (gup_flags & FOLL_PIN) mm_set_has_pinned_flag(&current->mm->flags); if (!(gup_flags & FOLL_FAST_ONLY)) might_lock_read(&current->mm->mmap_lock); start = untagged_addr(start) & PAGE_MASK; len = nr_pages << PAGE_SHIFT; if (check_add_overflow(start, len, &end)) return -EOVERFLOW; if (end > TASK_SIZE_MAX) return -EFAULT; if (unlikely(!access_ok((void __user *)start, len))) return -EFAULT; nr_pinned = lockless_pages_from_mm(start, end, gup_flags, pages); if (nr_pinned == nr_pages || gup_flags & FOLL_FAST_ONLY) return nr_pinned; /* Slow path: try to get the remaining pages with get_user_pages */ start += nr_pinned << PAGE_SHIFT; pages += nr_pinned; ret = __gup_longterm_locked(current->mm, start, nr_pages - nr_pinned, pages, &locked, gup_flags | FOLL_TOUCH | FOLL_UNLOCKABLE); if (ret < 0) { /* * The caller has to unpin the pages we already pinned so * returning -errno is not an option */ if (nr_pinned) return nr_pinned; return ret; } return ret + nr_pinned; } /** * get_user_pages_fast_only() - pin user pages in memory * @start: starting user address * @nr_pages: number of pages from start to pin * @gup_flags: flags modifying pin behaviour * @pages: array that receives pointers to the pages pinned. * Should be at least nr_pages long. * * Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to * the regular GUP. * * If the architecture does not support this function, simply return with no * pages pinned. * * Careful, careful! COW breaking can go either way, so a non-write * access can get ambiguous page results. If you call this function without * 'write' set, you'd better be sure that you're ok with that ambiguity. */ int get_user_pages_fast_only(unsigned long start, int nr_pages, unsigned int gup_flags, struct page **pages) { /* * Internally (within mm/gup.c), gup fast variants must set FOLL_GET, * because gup fast is always a "pin with a +1 page refcount" request. * * FOLL_FAST_ONLY is required in order to match the API description of * this routine: no fall back to regular ("slow") GUP. */ if (!is_valid_gup_args(pages, NULL, &gup_flags, FOLL_GET | FOLL_FAST_ONLY)) return -EINVAL; return internal_get_user_pages_fast(start, nr_pages, gup_flags, pages); } EXPORT_SYMBOL_GPL(get_user_pages_fast_only); /** * get_user_pages_fast() - pin user pages in memory * @start: starting user address * @nr_pages: number of pages from start to pin * @gup_flags: flags modifying pin behaviour * @pages: array that receives pointers to the pages pinned. * Should be at least nr_pages long. * * Attempt to pin user pages in memory without taking mm->mmap_lock. * If not successful, it will fall back to taking the lock and * calling get_user_pages(). * * Returns number of pages pinned. This may be fewer than the number requested. * If nr_pages is 0 or negative, returns 0. If no pages were pinned, returns * -errno. */ int get_user_pages_fast(unsigned long start, int nr_pages, unsigned int gup_flags, struct page **pages) { /* * The caller may or may not have explicitly set FOLL_GET; either way is * OK. However, internally (within mm/gup.c), gup fast variants must set * FOLL_GET, because gup fast is always a "pin with a +1 page refcount" * request. */ if (!is_valid_gup_args(pages, NULL, &gup_flags, FOLL_GET)) return -EINVAL; return internal_get_user_pages_fast(start, nr_pages, gup_flags, pages); } EXPORT_SYMBOL_GPL(get_user_pages_fast); /** * pin_user_pages_fast() - pin user pages in memory without taking locks * * @start: starting user address * @nr_pages: number of pages from start to pin * @gup_flags: flags modifying pin behaviour * @pages: array that receives pointers to the pages pinned. * Should be at least nr_pages long. * * Nearly the same as get_user_pages_fast(), except that FOLL_PIN is set. See * get_user_pages_fast() for documentation on the function arguments, because * the arguments here are identical. * * FOLL_PIN means that the pages must be released via unpin_user_page(). Please * see Documentation/core-api/pin_user_pages.rst for further details. * * Note that if a zero_page is amongst the returned pages, it will not have * pins in it and unpin_user_page() will not remove pins from it. */ int pin_user_pages_fast(unsigned long start, int nr_pages, unsigned int gup_flags, struct page **pages) { if (!is_valid_gup_args(pages, NULL, &gup_flags, FOLL_PIN)) return -EINVAL; return internal_get_user_pages_fast(start, nr_pages, gup_flags, pages); } EXPORT_SYMBOL_GPL(pin_user_pages_fast); /** * pin_user_pages_remote() - pin pages of a remote process * * @mm: mm_struct of target mm * @start: starting user address * @nr_pages: number of pages from start to pin * @gup_flags: flags modifying lookup behaviour * @pages: array that receives pointers to the pages pinned. * Should be at least nr_pages long. * @locked: pointer to lock flag indicating whether lock is held and * subsequently whether VM_FAULT_RETRY functionality can be * utilised. Lock must initially be held. * * Nearly the same as get_user_pages_remote(), except that FOLL_PIN is set. See * get_user_pages_remote() for documentation on the function arguments, because * the arguments here are identical. * * FOLL_PIN means that the pages must be released via unpin_user_page(). Please * see Documentation/core-api/pin_user_pages.rst for details. * * Note that if a zero_page is amongst the returned pages, it will not have * pins in it and unpin_user_page*() will not remove pins from it. */ long pin_user_pages_remote(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, int *locked) { int local_locked = 1; if (!is_valid_gup_args(pages, locked, &gup_flags, FOLL_PIN | FOLL_TOUCH | FOLL_REMOTE)) return 0; return __gup_longterm_locked(mm, start, nr_pages, pages, locked ? locked : &local_locked, gup_flags); } EXPORT_SYMBOL(pin_user_pages_remote); /** * pin_user_pages() - pin user pages in memory for use by other devices * * @start: starting user address * @nr_pages: number of pages from start to pin * @gup_flags: flags modifying lookup behaviour * @pages: array that receives pointers to the pages pinned. * Should be at least nr_pages long. * * Nearly the same as get_user_pages(), except that FOLL_TOUCH is not set, and * FOLL_PIN is set. * * FOLL_PIN means that the pages must be released via unpin_user_page(). Please * see Documentation/core-api/pin_user_pages.rst for details. * * Note that if a zero_page is amongst the returned pages, it will not have * pins in it and unpin_user_page*() will not remove pins from it. */ long pin_user_pages(unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages) { int locked = 1; if (!is_valid_gup_args(pages, NULL, &gup_flags, FOLL_PIN)) return 0; return __gup_longterm_locked(current->mm, start, nr_pages, pages, &locked, gup_flags); } EXPORT_SYMBOL(pin_user_pages); /* * pin_user_pages_unlocked() is the FOLL_PIN variant of * get_user_pages_unlocked(). Behavior is the same, except that this one sets * FOLL_PIN and rejects FOLL_GET. * * Note that if a zero_page is amongst the returned pages, it will not have * pins in it and unpin_user_page*() will not remove pins from it. */ long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages, struct page **pages, unsigned int gup_flags) { int locked = 0; if (!is_valid_gup_args(pages, NULL, &gup_flags, FOLL_PIN | FOLL_TOUCH | FOLL_UNLOCKABLE)) return 0; return __gup_longterm_locked(current->mm, start, nr_pages, pages, &locked, gup_flags); }

      fast gup functions

    3. /** * unpin_user_pages() - release an array of gup-pinned pages. * @pages: array of pages to be marked dirty and released. * @npages: number of pages in the @pages array. * * For each page in the @pages array, release the page using unpin_user_page(). * * Please see the unpin_user_page() documentation for details. */ void unpin_user_pages(struct page **pages, unsigned long npages) { unsigned long i; struct folio *folio; unsigned int nr; /* * If this WARN_ON() fires, then the system *might* be leaking pages (by * leaving them pinned), but probably not. More likely, gup/pup returned * a hard -ERRNO error to the caller, who erroneously passed it here. */ if (WARN_ON(IS_ERR_VALUE(npages))) return; sanity_check_pinned_pages(pages, npages); for (i = 0; i < npages; i += nr) { folio = gup_folio_next(pages, npages, i, &nr); gup_put_folio(folio, nr, FOLL_PIN); } }

      gup unpin function, not actual logic

    4. void unpin_user_page_range_dirty_lock(struct page *page, unsigned long npages, bool make_dirty) { unsigned long i; struct folio *folio; unsigned int nr; for (i = 0; i < npages; i += nr) { folio = gup_folio_range_next(page, npages, i, &nr); if (make_dirty && !folio_test_dirty(folio)) { folio_lock(folio); folio_mark_dirty(folio); folio_unlock(folio); } gup_put_folio(folio, nr, FOLL_PIN); } }

      unpin logic but for dirty pages

    5. #ifdef CONFIG_MIGRATION /* * Returns the number of collected pages. Return value is always >= 0. */ static unsigned long collect_longterm_unpinnable_pages( struct list_head *movable_page_list, unsigned long nr_pages, struct page **pages) { unsigned long i, collected = 0; struct folio *prev_folio = NULL; bool drain_allow = true; for (i = 0; i < nr_pages; i++) { struct folio *folio = page_folio(pages[i]); if (folio == prev_folio) continue; prev_folio = folio; if (folio_is_longterm_pinnable(folio)) continue; collected++; if (folio_is_device_coherent(folio)) continue; if (folio_test_hugetlb(folio)) { isolate_hugetlb(folio, movable_page_list); continue; } if (!folio_test_lru(folio) && drain_allow) { lru_add_drain_all(); drain_allow = false; } if (!folio_isolate_lru(folio)) continue; list_add_tail(&folio->lru, movable_page_list); node_stat_mod_folio(folio, NR_ISOLATED_ANON + folio_is_file_lru(folio), folio_nr_pages(folio)); } return collected; }
    6. int __mm_populate(unsigned long start, unsigned long len, int ignore_errors) { struct mm_struct *mm = current->mm; unsigned long end, nstart, nend; struct vm_area_struct *vma = NULL; int locked = 0; long ret = 0; end = start + len; for (nstart = start; nstart < end; nstart = nend) { /* * We want to fault in pages for [nstart; end) address range. * Find first corresponding VMA. */ if (!locked) { locked = 1; mmap_read_lock(mm); vma = find_vma_intersection(mm, nstart, end); } else if (nstart >= vma->vm_end) vma = find_vma_intersection(mm, vma->vm_end, end); if (!vma) break; /* * Set [nstart; nend) to intersection of desired address * range with the first VMA. Also, skip undesirable VMA types. */ nend = min(end, vma->vm_end); if (vma->vm_flags & (VM_IO | VM_PFNMAP)) continue; if (nstart < vma->vm_start) nstart = vma->vm_start; /* * Now fault in a range of pages. populate_vma_page_range() * double checks the vma flags, so that it won't mlock pages * if the vma was already munlocked. */ ret = populate_vma_page_range(vma, nstart, nend, &locked); if (ret < 0) { if (ignore_errors) { ret = 0; continue; /* continue at next VMA */ } break; } nend = nstart + ret * PAGE_SIZE; ret = 0; } if (locked) mmap_read_unlock(mm); return ret; /* 0 or negative error code */ }

      policy use function that populates pages like the func before this.

    7. long populate_vma_page_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, int *locked) { struct mm_struct *mm = vma->vm_mm; unsigned long nr_pages = (end - start) / PAGE_SIZE; int local_locked = 1; int gup_flags; long ret; VM_BUG_ON(!PAGE_ALIGNED(start)); VM_BUG_ON(!PAGE_ALIGNED(end)); VM_BUG_ON_VMA(start < vma->vm_start, vma); VM_BUG_ON_VMA(end > vma->vm_end, vma); mmap_assert_locked(mm); /* * Rightly or wrongly, the VM_LOCKONFAULT case has never used * faultin_page() to break COW, so it has no work to do here. */ if (vma->vm_flags & VM_LOCKONFAULT) return nr_pages; gup_flags = FOLL_TOUCH; /* * We want to touch writable mappings with a write fault in order * to break COW, except for shared mappings because these don't COW * and we would not want to dirty them for nothing. */ if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE) gup_flags |= FOLL_WRITE; /* * We want mlock to succeed for regions that have any permissions * other than PROT_NONE. */ if (vma_is_accessible(vma)) gup_flags |= FOLL_FORCE; if (locked) gup_flags |= FOLL_UNLOCKABLE; /* * We made sure addr is within a VMA, so the following will * not result in a stack expansion that recurses back here. */ ret = __get_user_pages(mm, start, nr_pages, gup_flags, NULL, locked ? locked : &local_locked); lru_add_drain(); return ret; }

      policy use code.

    8. #ifdef CONFIG_STACK_GROWSUP return vma_lookup(mm, addr); #else static volatile unsigned long next_warn; struct vm_area_struct *vma; unsigned long now, next; vma = find_vma(mm, addr); if (!vma || (addr >= vma->vm_start)) return vma; /* Only warn for half-way relevant accesses */ if (!(vma->vm_flags & VM_GROWSDOWN)) return NULL; if (vma->vm_start - addr > 65536) return NULL; /* Let's not warn more than once an hour.. */ now = jiffies; next = next_warn; if (next && time_before(now, next)) return NULL; next_warn = now + 60*60*HZ; /* Let people know things may have changed. */ pr_warn("GUP no longer grows the stack in %s (%d): %lx-%lx (%lx)\n", current->comm, task_pid_nr(current), vma->vm_start, vma->vm_end, addr); dump_stack(); return NULL;

      helper func to lookup vma(virtual mem area) that warns per hour about half way relevant acc and changes in stack

    9. if (*flags & FOLL_NOFAULT) return -EFAULT; if (*flags & FOLL_WRITE) fault_flags |= FAULT_FLAG_WRITE; if (*flags & FOLL_REMOTE) fault_flags |= FAULT_FLAG_REMOTE; if (*flags & FOLL_UNLOCKABLE) { fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; /* * FAULT_FLAG_INTERRUPTIBLE is opt-in. GUP callers must set * FOLL_INTERRUPTIBLE to enable FAULT_FLAG_INTERRUPTIBLE. * That's because some callers may not be prepared to * handle early exits caused by non-fatal signals. */ if (*flags & FOLL_INTERRUPTIBLE) fault_flags |= FAULT_FLAG_INTERRUPTIBLE; } if (*flags & FOLL_NOWAIT) fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT; if (*flags & FOLL_TRIED) { /* * Note: FAULT_FLAG_ALLOW_RETRY and FAULT_FLAG_TRIED * can co-exist */ fault_flags |= FAULT_FLAG_TRIED; } if (unshare) { fault_flags |= FAULT_FLAG_UNSHARE; /* FAULT_FLAG_WRITE and FAULT_FLAG_UNSHARE are incompatible */ VM_BUG_ON(fault_flags & FAULT_FLAG_WRITE); } ret = handle_mm_fault(vma, address, fault_flags, NULL); if (ret & VM_FAULT_COMPLETED) { /* * With FAULT_FLAG_RETRY_NOWAIT we'll never release the * mmap lock in the page fault handler. Sanity check this. */ WARN_ON_ONCE(fault_flags & FAULT_FLAG_RETRY_NOWAIT); *locked = 0; /* * We should do the same as VM_FAULT_RETRY, but let's not * return -EBUSY since that's not reflecting the reality of * what has happened - we've just fully completed a page * fault, with the mmap lock released. Use -EAGAIN to show * that we want to take the mmap lock _again_. */ return -EAGAIN; } if (ret & VM_FAULT_ERROR) { int err = vm_fault_to_errno(ret, *flags); if (err) return err; BUG(); } if (ret & VM_FAULT_RETRY) { if (!(fault_flags & FAULT_FLAG_RETRY_NOWAIT)) *locked = 0; return -EBUSY; }

      Seems it's just setting flags for page faults based on flags param

    10. if (likely(!pmd_trans_huge(pmdval))) return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap); if (pmd_protnone(pmdval) && !gup_can_follow_protnone(vma, flags)) return no_page_table(vma, flags); ptl = pmd_lock(mm, pmd); if (unlikely(!pmd_present(*pmd))) { spin_unlock(ptl); return no_page_table(vma, flags); } if (unlikely(!pmd_trans_huge(*pmd))) { spin_unlock(ptl); return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap); }

      branch prediction to check if pmd is there and if it's big

    11. if (pmd_none(pmdval)) return no_page_table(vma, flags); if (!pmd_present(pmdval)) return no_page_table(vma, flags); if (pmd_devmap(pmdval)) { ptl = pmd_lock(mm, pmd); page = follow_devmap_pmd(vma, address, pmd, flags, &ctx->pgmap); spin_unlock(ptl); if (page) return page; }

      checks if pmd is there. im assuming it's page middle dir.

    12. void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages, bool make_dirty) { unsigned long i; struct folio *folio; unsigned int nr; if (!make_dirty) { unpin_user_pages(pages, npages); return; } sanity_check_pinned_pages(pages, npages); for (i = 0; i < npages; i += nr) { folio = gup_folio_next(pages, npages, i, &nr); /* * Checking PageDirty at this point may race with * clear_page_dirty_for_io(), but that's OK. Two key * cases: * * 1) This code sees the page as already dirty, so it * skips the call to set_page_dirty(). That could happen * because clear_page_dirty_for_io() called * page_mkclean(), followed by set_page_dirty(). * However, now the page is going to get written back, * which meets the original intention of setting it * dirty, so all is well: clear_page_dirty_for_io() goes * on to call TestClearPageDirty(), and write the page * back. * * 2) This code sees the page as clean, so it calls * set_page_dirty(). The page stays dirty, despite being * written back, so it gets written back again in the * next writeback cycle. This is harmless. */ if (!folio_test_dirty(folio)) { folio_lock(folio); folio_mark_dirty(folio); folio_unlock(folio); } gup_put_folio(folio, nr, FOLL_PIN); } }

      unpins and dirties page

    13. static inline struct folio *gup_folio_range_next(struct page *start, unsigned long npages, unsigned long i, unsigned int *ntails) { struct page *next = nth_page(start, i); struct folio *folio = page_folio(next); unsigned int nr = 1; if (folio_test_large(folio)) nr = min_t(unsigned int, npages - i, folio_nr_pages(folio) - folio_page_idx(folio, next)); *ntails = nr; return folio; }

      gets the folio of the next page from start to 'i' range. also gets the tail folio/reference

    14. else if (flags & FOLL_PIN) { /* * Don't take a pin on the zero page - it's not going anywhere * and it is used in a *lot* of places. */ if (is_zero_page(page)) return 0; /* * Similar to try_grab_folio(): be sure to *also* * increment the normal page refcount field at least once, * so that the page really is pinned. */ if (folio_test_large(folio)) { folio_ref_add(folio, 1); atomic_add(1, &folio->_pincount); } else { folio_ref_add(folio, GUP_PIN_COUNTING_BIAS); } node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, 1); }

      Logic that actually tries to grab the folio. Also policy use code and not actual policy

    15. if (is_zero_page(page) || !folio_test_anon(folio)) continue; if (!folio_test_large(folio) || folio_test_hugetlb(folio))

      Sanity checks for pinned pages wouldn't classify as policy logic but common sense pre-checks for the actual policy. But I think it's worth tagging this to gain a sense of what is not policy code