24 hotfixes. 8 marked cc:stable and 16 for post-6.0 issues.

There have been a lot of hotfixes this cycle, and this is quite a large
 batch given how far we are into the -rc cycle.  Presumably a reflection of
 the unusually large amount of MM material which went into 6.1-rc1.
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQTTMBEPP41GrTpTJgfdBJ7gKXxAjgUCY4Bd6gAKCRDdBJ7gKXxA
 jvX6AQCsG1ld24kMpdD+70XXUyC29g/6/jribgtZApHyDYjxSwD/WmLNpPlUPRax
 WB071Y5w65vjSTUKvwU0OLGbHwyxgAw=
 =swD5
 -----END PGP SIGNATURE-----

Merge tag 'mm-hotfixes-stable-2022-11-24' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

Pull hotfixes from Andrew Morton:
 "24 MM and non-MM hotfixes. 8 marked cc:stable and 16 for post-6.0
  issues.

  There have been a lot of hotfixes this cycle, and this is quite a
  large batch given how far we are into the -rc cycle. Presumably a
  reflection of the unusually large amount of MM material which went
  into 6.1-rc1"

* tag 'mm-hotfixes-stable-2022-11-24' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (24 commits)
  test_kprobes: fix implicit declaration error of test_kprobes
  nilfs2: fix nilfs_sufile_mark_dirty() not set segment usage as dirty
  mm/cgroup/reclaim: fix dirty pages throttling on cgroup v1
  mm: fix unexpected changes to {failslab|fail_page_alloc}.attr
  swapfile: fix soft lockup in scan_swap_map_slots
  hugetlb: fix __prep_compound_gigantic_page page flag setting
  kfence: fix stack trace pruning
  proc/meminfo: fix spacing in SecPageTables
  mm: multi-gen LRU: retry folios written back while isolated
  mailmap: update email address for Satya Priya
  mm/migrate_device: return number of migrating pages in args->cpages
  kbuild: fix -Wimplicit-function-declaration in license_is_gpl_compatible
  MAINTAINERS: update Alex Hung's email address
  mailmap: update Alex Hung's email address
  mm: mmap: fix documentation for vma_mas_szero
  mm/damon/sysfs-schemes: skip stats update if the scheme directory is removed
  mm/memory: return vm_fault_t result from migrate_to_ram() callback
  mm: correctly charge compressed memory to its memcg
  ipc/shm: call underlying open/close vm_ops
  gcov: clang: fix the buffer overflow issue
  ...
This commit is contained in:
Linus Torvalds 2022-11-25 10:18:25 -08:00
commit 0b1dcc2cf5
24 changed files with 173 additions and 79 deletions

View file

@ -29,6 +29,7 @@ Alexandre Belloni <alexandre.belloni@bootlin.com> <alexandre.belloni@free-electr
Alexei Starovoitov <ast@kernel.org> <alexei.starovoitov@gmail.com>
Alexei Starovoitov <ast@kernel.org> <ast@fb.com>
Alexei Starovoitov <ast@kernel.org> <ast@plumgrid.com>
Alex Hung <alexhung@gmail.com> <alex.hung@canonical.com>
Alex Shi <alexs@kernel.org> <alex.shi@intel.com>
Alex Shi <alexs@kernel.org> <alex.shi@linaro.org>
Alex Shi <alexs@kernel.org> <alex.shi@linux.alibaba.com>
@ -382,6 +383,7 @@ Santosh Shilimkar <santosh.shilimkar@oracle.org>
Santosh Shilimkar <ssantosh@kernel.org>
Sarangdhar Joshi <spjoshi@codeaurora.org>
Sascha Hauer <s.hauer@pengutronix.de>
Satya Priya <quic_c_skakit@quicinc.com> <skakit@codeaurora.org>
S.Çağlar Onur <caglar@pardus.org.tr>
Sean Christopherson <seanjc@google.com> <sean.j.christopherson@intel.com>
Sean Nyekjaer <sean@geanix.com> <sean.nyekjaer@prevas.dk>

View file

@ -10287,7 +10287,7 @@ T: git https://github.com/intel/gvt-linux.git
F: drivers/gpu/drm/i915/gvt/
INTEL HID EVENT DRIVER
M: Alex Hung <alex.hung@canonical.com>
M: Alex Hung <alexhung@gmail.com>
L: platform-driver-x86@vger.kernel.org
S: Maintained
F: drivers/platform/x86/intel/hid.c

View file

@ -495,14 +495,22 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum,
int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum)
{
struct buffer_head *bh;
void *kaddr;
struct nilfs_segment_usage *su;
int ret;
down_write(&NILFS_MDT(sufile)->mi_sem);
ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh);
if (!ret) {
mark_buffer_dirty(bh);
nilfs_mdt_mark_dirty(sufile);
kaddr = kmap_atomic(bh->b_page);
su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr);
nilfs_segment_usage_set_dirty(su);
kunmap_atomic(kaddr);
brelse(bh);
}
up_write(&NILFS_MDT(sufile)->mi_sem);
return ret;
}

View file

@ -115,7 +115,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
#endif
show_val_kb(m, "PageTables: ",
global_node_page_state(NR_PAGETABLE));
show_val_kb(m, "SecPageTables: ",
show_val_kb(m, "SecPageTables: ",
global_node_page_state(NR_SECONDARY_PAGETABLE));
show_val_kb(m, "NFS_Unstable: ", 0);

View file

@ -20,7 +20,6 @@ struct fault_attr {
atomic_t space;
unsigned long verbose;
bool task_filter;
bool no_warn;
unsigned long stacktrace_depth;
unsigned long require_start;
unsigned long require_end;
@ -32,6 +31,10 @@ struct fault_attr {
struct dentry *dname;
};
enum fault_flags {
FAULT_NOWARN = 1 << 0,
};
#define FAULT_ATTR_INITIALIZER { \
.interval = 1, \
.times = ATOMIC_INIT(1), \
@ -40,11 +43,11 @@ struct fault_attr {
.ratelimit_state = RATELIMIT_STATE_INIT_DISABLED, \
.verbose = 2, \
.dname = NULL, \
.no_warn = false, \
}
#define DECLARE_FAULT_ATTR(name) struct fault_attr name = FAULT_ATTR_INITIALIZER
int setup_fault_attr(struct fault_attr *attr, char *str);
bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags);
bool should_fail(struct fault_attr *attr, ssize_t size);
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS

View file

@ -2,6 +2,8 @@
#ifndef __LICENSE_H
#define __LICENSE_H
#include <linux/string.h>
static inline int license_is_gpl_compatible(const char *license)
{
return (strcmp(license, "GPL") == 0

View file

@ -171,15 +171,15 @@ TRACE_EVENT(mm_collapse_huge_page_swapin,
TRACE_EVENT(mm_khugepaged_scan_file,
TP_PROTO(struct mm_struct *mm, struct page *page, const char *filename,
TP_PROTO(struct mm_struct *mm, struct page *page, struct file *file,
int present, int swap, int result),
TP_ARGS(mm, page, filename, present, swap, result),
TP_ARGS(mm, page, file, present, swap, result),
TP_STRUCT__entry(
__field(struct mm_struct *, mm)
__field(unsigned long, pfn)
__string(filename, filename)
__string(filename, file->f_path.dentry->d_iname)
__field(int, present)
__field(int, swap)
__field(int, result)
@ -188,7 +188,7 @@ TRACE_EVENT(mm_khugepaged_scan_file,
TP_fast_assign(
__entry->mm = mm;
__entry->pfn = page ? page_to_pfn(page) : -1;
__assign_str(filename, filename);
__assign_str(filename, file->f_path.dentry->d_iname);
__entry->present = present;
__entry->swap = swap;
__entry->result = result;

View file

@ -275,10 +275,8 @@ static inline void shm_rmid(struct shmid_kernel *s)
}
static int __shm_open(struct vm_area_struct *vma)
static int __shm_open(struct shm_file_data *sfd)
{
struct file *file = vma->vm_file;
struct shm_file_data *sfd = shm_file_data(file);
struct shmid_kernel *shp;
shp = shm_lock(sfd->ns, sfd->id);
@ -302,7 +300,15 @@ static int __shm_open(struct vm_area_struct *vma)
/* This is called by fork, once for every shm attach. */
static void shm_open(struct vm_area_struct *vma)
{
int err = __shm_open(vma);
struct file *file = vma->vm_file;
struct shm_file_data *sfd = shm_file_data(file);
int err;
/* Always call underlying open if present */
if (sfd->vm_ops->open)
sfd->vm_ops->open(vma);
err = __shm_open(sfd);
/*
* We raced in the idr lookup or with shm_destroy().
* Either way, the ID is busted.
@ -359,10 +365,8 @@ static bool shm_may_destroy(struct shmid_kernel *shp)
* The descriptor has already been removed from the current->mm->mmap list
* and will later be kfree()d.
*/
static void shm_close(struct vm_area_struct *vma)
static void __shm_close(struct shm_file_data *sfd)
{
struct file *file = vma->vm_file;
struct shm_file_data *sfd = shm_file_data(file);
struct shmid_kernel *shp;
struct ipc_namespace *ns = sfd->ns;
@ -388,6 +392,18 @@ static void shm_close(struct vm_area_struct *vma)
up_write(&shm_ids(ns).rwsem);
}
static void shm_close(struct vm_area_struct *vma)
{
struct file *file = vma->vm_file;
struct shm_file_data *sfd = shm_file_data(file);
/* Always call underlying close if present */
if (sfd->vm_ops->close)
sfd->vm_ops->close(vma);
__shm_close(sfd);
}
/* Called with ns->shm_ids(ns).rwsem locked */
static int shm_try_destroy_orphaned(int id, void *p, void *data)
{
@ -583,13 +599,13 @@ static int shm_mmap(struct file *file, struct vm_area_struct *vma)
* IPC ID that was removed, and possibly even reused by another shm
* segment already. Propagate this case as an error to caller.
*/
ret = __shm_open(vma);
ret = __shm_open(sfd);
if (ret)
return ret;
ret = call_mmap(sfd->file, vma);
if (ret) {
shm_close(vma);
__shm_close(sfd);
return ret;
}
sfd->vm_ops = vma->vm_ops;

View file

@ -280,6 +280,8 @@ void gcov_info_add(struct gcov_info *dst, struct gcov_info *src)
for (i = 0; i < sfn_ptr->num_counters; i++)
dfn_ptr->counters[i] += sfn_ptr->counters[i];
sfn_ptr = list_next_entry(sfn_ptr, head);
}
}

View file

@ -2107,6 +2107,7 @@ config KPROBES_SANITY_TEST
depends on DEBUG_KERNEL
depends on KPROBES
depends on KUNIT
select STACKTRACE if ARCH_CORRECT_STACKTRACE_ON_KRETPROBE
default KUNIT_ALL_TESTS
help
This option provides for testing basic kprobes functionality on

View file

@ -41,9 +41,6 @@ EXPORT_SYMBOL_GPL(setup_fault_attr);
static void fail_dump(struct fault_attr *attr)
{
if (attr->no_warn)
return;
if (attr->verbose > 0 && __ratelimit(&attr->ratelimit_state)) {
printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure.\n"
"name %pd, interval %lu, probability %lu, "
@ -103,7 +100,7 @@ static inline bool fail_stacktrace(struct fault_attr *attr)
* http://www.nongnu.org/failmalloc/
*/
bool should_fail(struct fault_attr *attr, ssize_t size)
bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags)
{
if (in_task()) {
unsigned int fail_nth = READ_ONCE(current->fail_nth);
@ -146,13 +143,19 @@ bool should_fail(struct fault_attr *attr, ssize_t size)
return false;
fail:
fail_dump(attr);
if (!(flags & FAULT_NOWARN))
fail_dump(attr);
if (atomic_read(&attr->times) != -1)
atomic_dec_not_zero(&attr->times);
return true;
}
bool should_fail(struct fault_attr *attr, ssize_t size)
{
return should_fail_ex(attr, size, 0);
}
EXPORT_SYMBOL_GPL(should_fail);
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS

View file

@ -2339,6 +2339,10 @@ static int damon_sysfs_upd_schemes_stats(struct damon_sysfs_kdamond *kdamond)
damon_for_each_scheme(scheme, ctx) {
struct damon_sysfs_stats *sysfs_stats;
/* user could have removed the scheme sysfs dir */
if (schemes_idx >= sysfs_schemes->nr)
break;
sysfs_stats = sysfs_schemes->schemes_arr[schemes_idx++]->stats;
sysfs_stats->nr_tried = scheme->stat.nr_tried;
sysfs_stats->sz_tried = scheme->stat.sz_tried;

View file

@ -16,6 +16,8 @@ static struct {
bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
{
int flags = 0;
/* No fault-injection for bootstrap cache */
if (unlikely(s == kmem_cache))
return false;
@ -30,10 +32,16 @@ bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
if (failslab.cache_filter && !(s->flags & SLAB_FAILSLAB))
return false;
/*
* In some cases, it expects to specify __GFP_NOWARN
* to avoid printing any information(not just a warning),
* thus avoiding deadlocks. See commit 6b9dbedbe349 for
* details.
*/
if (gfpflags & __GFP_NOWARN)
failslab.attr.no_warn = true;
flags |= FAULT_NOWARN;
return should_fail(&failslab.attr, s->object_size);
return should_fail_ex(&failslab.attr, s->object_size, flags);
}
static int __init setup_failslab(char *str)

View file

@ -1800,6 +1800,7 @@ static bool __prep_compound_gigantic_page(struct page *page, unsigned int order,
/* we rely on prep_new_huge_page to set the destructor */
set_compound_order(page, order);
__ClearPageReserved(page);
__SetPageHead(page);
for (i = 0; i < nr_pages; i++) {
p = nth_page(page, i);
@ -1816,7 +1817,8 @@ static bool __prep_compound_gigantic_page(struct page *page, unsigned int order,
* on the head page when they need know if put_page() is needed
* after get_user_pages().
*/
__ClearPageReserved(p);
if (i != 0) /* head page cleared above */
__ClearPageReserved(p);
/*
* Subtle and very unlikely
*

View file

@ -75,18 +75,23 @@ static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries
if (str_has_prefix(buf, ARCH_FUNC_PREFIX "kfence_") ||
str_has_prefix(buf, ARCH_FUNC_PREFIX "__kfence_") ||
str_has_prefix(buf, ARCH_FUNC_PREFIX "__kmem_cache_free") ||
!strncmp(buf, ARCH_FUNC_PREFIX "__slab_free", len)) {
/*
* In case of tail calls from any of the below
* to any of the above.
* In case of tail calls from any of the below to any of
* the above, optimized by the compiler such that the
* stack trace would omit the initial entry point below.
*/
fallback = skipnr + 1;
}
/* Also the *_bulk() variants by only checking prefixes. */
/*
* The below list should only include the initial entry points
* into the slab allocators. Includes the *_bulk() variants by
* checking prefixes.
*/
if (str_has_prefix(buf, ARCH_FUNC_PREFIX "kfree") ||
str_has_prefix(buf, ARCH_FUNC_PREFIX "kmem_cache_free") ||
str_has_prefix(buf, ARCH_FUNC_PREFIX "__kmem_cache_free") ||
str_has_prefix(buf, ARCH_FUNC_PREFIX "__kmalloc") ||
str_has_prefix(buf, ARCH_FUNC_PREFIX "kmem_cache_alloc"))
goto found;

View file

@ -97,8 +97,8 @@ struct collapse_control {
/* Num pages scanned per node */
u32 node_load[MAX_NUMNODES];
/* Last target selected in hpage_collapse_find_target_node() */
int last_target_node;
/* nodemask for allocation fallback */
nodemask_t alloc_nmask;
};
/**
@ -734,7 +734,6 @@ static void khugepaged_alloc_sleep(void)
struct collapse_control khugepaged_collapse_control = {
.is_khugepaged = true,
.last_target_node = NUMA_NO_NODE,
};
static bool hpage_collapse_scan_abort(int nid, struct collapse_control *cc)
@ -783,16 +782,11 @@ static int hpage_collapse_find_target_node(struct collapse_control *cc)
target_node = nid;
}
/* do some balance if several nodes have the same hit record */
if (target_node <= cc->last_target_node)
for (nid = cc->last_target_node + 1; nid < MAX_NUMNODES;
nid++)
if (max_value == cc->node_load[nid]) {
target_node = nid;
break;
}
for_each_online_node(nid) {
if (max_value == cc->node_load[nid])
node_set(nid, cc->alloc_nmask);
}
cc->last_target_node = target_node;
return target_node;
}
#else
@ -802,9 +796,10 @@ static int hpage_collapse_find_target_node(struct collapse_control *cc)
}
#endif
static bool hpage_collapse_alloc_page(struct page **hpage, gfp_t gfp, int node)
static bool hpage_collapse_alloc_page(struct page **hpage, gfp_t gfp, int node,
nodemask_t *nmask)
{
*hpage = __alloc_pages_node(node, gfp, HPAGE_PMD_ORDER);
*hpage = __alloc_pages(gfp, HPAGE_PMD_ORDER, node, nmask);
if (unlikely(!*hpage)) {
count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
return false;
@ -955,12 +950,11 @@ static int __collapse_huge_page_swapin(struct mm_struct *mm,
static int alloc_charge_hpage(struct page **hpage, struct mm_struct *mm,
struct collapse_control *cc)
{
/* Only allocate from the target node */
gfp_t gfp = (cc->is_khugepaged ? alloc_hugepage_khugepaged_gfpmask() :
GFP_TRANSHUGE) | __GFP_THISNODE;
GFP_TRANSHUGE);
int node = hpage_collapse_find_target_node(cc);
if (!hpage_collapse_alloc_page(hpage, gfp, node))
if (!hpage_collapse_alloc_page(hpage, gfp, node, &cc->alloc_nmask))
return SCAN_ALLOC_HUGE_PAGE_FAIL;
if (unlikely(mem_cgroup_charge(page_folio(*hpage), mm, gfp)))
return SCAN_CGROUP_CHARGE_FAIL;
@ -1144,6 +1138,7 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm,
goto out;
memset(cc->node_load, 0, sizeof(cc->node_load));
nodes_clear(cc->alloc_nmask);
pte = pte_offset_map_lock(mm, pmd, address, &ptl);
for (_address = address, _pte = pte; _pte < pte + HPAGE_PMD_NR;
_pte++, _address += PAGE_SIZE) {
@ -2077,6 +2072,7 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr,
present = 0;
swap = 0;
memset(cc->node_load, 0, sizeof(cc->node_load));
nodes_clear(cc->alloc_nmask);
rcu_read_lock();
xas_for_each(&xas, page, start + HPAGE_PMD_NR - 1) {
if (xas_retry(&xas, page))
@ -2157,8 +2153,7 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr,
}
}
trace_mm_khugepaged_scan_file(mm, page, file->f_path.dentry->d_iname,
present, swap, result);
trace_mm_khugepaged_scan_file(mm, page, file, present, swap, result);
return result;
}
#else
@ -2576,7 +2571,6 @@ int madvise_collapse(struct vm_area_struct *vma, struct vm_area_struct **prev,
if (!cc)
return -ENOMEM;
cc->is_khugepaged = false;
cc->last_target_node = NUMA_NO_NODE;
mmgrab(mm);
lru_add_drain_all();
@ -2602,6 +2596,7 @@ int madvise_collapse(struct vm_area_struct *vma, struct vm_area_struct **prev,
}
mmap_assert_locked(mm);
memset(cc->node_load, 0, sizeof(cc->node_load));
nodes_clear(cc->alloc_nmask);
if (IS_ENABLED(CONFIG_SHMEM) && vma->vm_file) {
struct file *file = get_file(vma->vm_file);
pgoff_t pgoff = linear_page_index(vma, addr);

View file

@ -3026,7 +3026,7 @@ struct obj_cgroup *get_obj_cgroup_from_page(struct page *page)
{
struct obj_cgroup *objcg;
if (!memcg_kmem_enabled() || memcg_kmem_bypass())
if (!memcg_kmem_enabled())
return NULL;
if (PageMemcgKmem(page)) {

View file

@ -3763,7 +3763,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
*/
get_page(vmf->page);
pte_unmap_unlock(vmf->pte, vmf->ptl);
vmf->page->pgmap->ops->migrate_to_ram(vmf);
ret = vmf->page->pgmap->ops->migrate_to_ram(vmf);
put_page(vmf->page);
} else if (is_hwpoison_entry(entry)) {
ret = VM_FAULT_HWPOISON;

View file

@ -357,7 +357,8 @@ static bool migrate_vma_check_page(struct page *page, struct page *fault_page)
}
/*
* Unmaps pages for migration. Returns number of unmapped pages.
* Unmaps pages for migration. Returns number of source pfns marked as
* migrating.
*/
static unsigned long migrate_device_unmap(unsigned long *src_pfns,
unsigned long npages,
@ -373,8 +374,11 @@ static unsigned long migrate_device_unmap(unsigned long *src_pfns,
struct page *page = migrate_pfn_to_page(src_pfns[i]);
struct folio *folio;
if (!page)
if (!page) {
if (src_pfns[i] & MIGRATE_PFN_MIGRATE)
unmapped++;
continue;
}
/* ZONE_DEVICE pages are not on LRU */
if (!is_zone_device_page(page)) {

View file

@ -456,7 +456,7 @@ void vma_mas_remove(struct vm_area_struct *vma, struct ma_state *mas)
* vma_mas_szero() - Set a given range to zero. Used when modifying a
* vm_area_struct start or end.
*
* @mm: The struct_mm
* @mas: The maple tree ma_state
* @start: The start address to zero
* @end: The end address to zero.
*/

View file

@ -3887,6 +3887,8 @@ __setup("fail_page_alloc=", setup_fail_page_alloc);
static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
{
int flags = 0;
if (order < fail_page_alloc.min_order)
return false;
if (gfp_mask & __GFP_NOFAIL)
@ -3897,10 +3899,11 @@ static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
(gfp_mask & __GFP_DIRECT_RECLAIM))
return false;
/* See comment in __should_failslab() */
if (gfp_mask & __GFP_NOWARN)
fail_page_alloc.attr.no_warn = true;
flags |= FAULT_NOWARN;
return should_fail(&fail_page_alloc.attr, 1 << order);
return should_fail_ex(&fail_page_alloc.attr, 1 << order, flags);
}
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS

View file

@ -166,7 +166,7 @@ struct page_ext *page_ext_get(struct page *page)
/**
* page_ext_put() - Working with page extended information is done.
* @page_ext - Page extended information received from page_ext_get().
* @page_ext: Page extended information received from page_ext_get().
*
* The page extended information of the page may not be valid after this
* function is called.

View file

@ -973,23 +973,23 @@ static int scan_swap_map_slots(struct swap_info_struct *si,
scan:
spin_unlock(&si->lock);
while (++offset <= READ_ONCE(si->highest_bit)) {
if (swap_offset_available_and_locked(si, offset))
goto checks;
if (unlikely(--latency_ration < 0)) {
cond_resched();
latency_ration = LATENCY_LIMIT;
scanned_many = true;
}
if (swap_offset_available_and_locked(si, offset))
goto checks;
}
offset = si->lowest_bit;
while (offset < scan_base) {
if (swap_offset_available_and_locked(si, offset))
goto checks;
if (unlikely(--latency_ration < 0)) {
cond_resched();
latency_ration = LATENCY_LIMIT;
scanned_many = true;
}
if (swap_offset_available_and_locked(si, offset))
goto checks;
offset++;
}
spin_lock(&si->lock);

View file

@ -2514,8 +2514,20 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan,
* the flushers simply cannot keep up with the allocation
* rate. Nudge the flusher threads in case they are asleep.
*/
if (stat.nr_unqueued_dirty == nr_taken)
if (stat.nr_unqueued_dirty == nr_taken) {
wakeup_flusher_threads(WB_REASON_VMSCAN);
/*
* For cgroupv1 dirty throttling is achieved by waking up
* the kernel flusher here and later waiting on folios
* which are in writeback to finish (see shrink_folio_list()).
*
* Flusher may not be able to issue writeback quickly
* enough for cgroupv1 writeback throttling to work
* on a large system.
*/
if (!writeback_throttling_sane(sc))
reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK);
}
sc->nr.dirty += stat.nr_dirty;
sc->nr.congested += stat.nr_congested;
@ -4971,10 +4983,13 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap
int scanned;
int reclaimed;
LIST_HEAD(list);
LIST_HEAD(clean);
struct folio *folio;
struct folio *next;
enum vm_event_item item;
struct reclaim_stat stat;
struct lru_gen_mm_walk *walk;
bool skip_retry = false;
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
@ -4991,20 +5006,37 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap
if (list_empty(&list))
return scanned;
retry:
reclaimed = shrink_folio_list(&list, pgdat, sc, &stat, false);
sc->nr_reclaimed += reclaimed;
list_for_each_entry(folio, &list, lru) {
/* restore LRU_REFS_FLAGS cleared by isolate_folio() */
if (folio_test_workingset(folio))
folio_set_referenced(folio);
list_for_each_entry_safe_reverse(folio, next, &list, lru) {
if (!folio_evictable(folio)) {
list_del(&folio->lru);
folio_putback_lru(folio);
continue;
}
/* don't add rejected pages to the oldest generation */
if (folio_test_reclaim(folio) &&
(folio_test_dirty(folio) || folio_test_writeback(folio)))
folio_clear_active(folio);
else
folio_set_active(folio);
(folio_test_dirty(folio) || folio_test_writeback(folio))) {
/* restore LRU_REFS_FLAGS cleared by isolate_folio() */
if (folio_test_workingset(folio))
folio_set_referenced(folio);
continue;
}
if (skip_retry || folio_test_active(folio) || folio_test_referenced(folio) ||
folio_mapped(folio) || folio_test_locked(folio) ||
folio_test_dirty(folio) || folio_test_writeback(folio)) {
/* don't add rejected folios to the oldest generation */
set_mask_bits(&folio->flags, LRU_REFS_MASK | LRU_REFS_FLAGS,
BIT(PG_active));
continue;
}
/* retry folios that may have missed folio_rotate_reclaimable() */
list_move(&folio->lru, &clean);
sc->nr_scanned -= folio_nr_pages(folio);
}
spin_lock_irq(&lruvec->lru_lock);
@ -5026,7 +5058,13 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap
mem_cgroup_uncharge_list(&list);
free_unref_page_list(&list);
sc->nr_reclaimed += reclaimed;
INIT_LIST_HEAD(&list);
list_splice_init(&clean, &list);
if (!list_empty(&list)) {
skip_retry = true;
goto retry;
}
if (need_swapping && type == LRU_GEN_ANON)
*need_swapping = true;
@ -5844,8 +5882,8 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
enum lru_list lru;
unsigned long nr_reclaimed = 0;
unsigned long nr_to_reclaim = sc->nr_to_reclaim;
bool proportional_reclaim;
struct blk_plug plug;
bool scan_adjusted;
if (lru_gen_enabled()) {
lru_gen_shrink_lruvec(lruvec, sc);
@ -5868,8 +5906,8 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
* abort proportional reclaim if either the file or anon lru has already
* dropped to zero at the first pass.
*/
scan_adjusted = (!cgroup_reclaim(sc) && !current_is_kswapd() &&
sc->priority == DEF_PRIORITY);
proportional_reclaim = (!cgroup_reclaim(sc) && !current_is_kswapd() &&
sc->priority == DEF_PRIORITY);
blk_start_plug(&plug);
while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
@ -5889,7 +5927,7 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
cond_resched();
if (nr_reclaimed < nr_to_reclaim || scan_adjusted)
if (nr_reclaimed < nr_to_reclaim || proportional_reclaim)
continue;
/*
@ -5940,8 +5978,6 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
nr_scanned = targets[lru] - nr[lru];
nr[lru] = targets[lru] * (100 - percentage) / 100;
nr[lru] -= min(nr[lru], nr_scanned);
scan_adjusted = true;
}
blk_finish_plug(&plug);
sc->nr_reclaimed += nr_reclaimed;