Revert "mm: process reclaim: vmpressure based process reclaim"

- This reverts commit 7964b3ce47f0d87fbbb1cfdd1fb4aadb620133dd as QCOM vmpressure driven process reclaim is redundant compared to Linux PPR which meets userspace dependencies. Change-Id: I46782f69c57febed99002681ee268fa4a3111d59 Signed-off-by: Cyber Knight <cyberknight755@gmail.com> Signed-off-by: Richard Raya <rdxzv.dev@gmail.com>
2025-02-20 11:45:48 +08:00 · 2022-11-28 13:34:19 -05:00 · 2022-11-28 13:34:19 -05:00 · 12290e8a8b
commit 12290e8a8b
parent 06425a87ef
5 changed files with 8 additions and 418 deletions
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@ -1849,17 +1849,15 @@ const struct file_operations proc_pagemap_operations = {
 int reclaim_pte_range(pmd_t *pmd, unsigned long addr,
 				unsigned long end, struct mm_walk *walk)
 {
-	struct reclaim_param *rp = walk->private;
-	struct vm_area_struct *vma = rp->vma;
+	struct vm_area_struct *vma = walk->private;
 	pte_t *pte, ptent;
 	spinlock_t *ptl;
 	struct page *page;
 	LIST_HEAD(page_list);
 	int isolated;
-	int reclaimed;

 	split_huge_pmd(vma, addr, pmd);
-	if (pmd_trans_unstable(pmd) || !rp->nr_to_reclaim)
+	if (pmd_trans_unstable(pmd))
 		return 0;
 cont:
 	isolated = 0;
@ -1895,22 +1893,16 @@ cont:
 		inc_node_page_state(page, NR_ISOLATED_ANON +
 				page_is_file_cache(page));
 		isolated++;
-		rp->nr_scanned++;
-		if ((isolated >= SWAP_CLUSTER_MAX) || !rp->nr_to_reclaim)
+		if (isolated >= SWAP_CLUSTER_MAX)
 			break;
 	}
 	pte_unmap_unlock(pte - 1, ptl);
-	reclaimed = reclaim_pages_from_list(&page_list, vma);
-	rp->nr_reclaimed += reclaimed;
-	rp->nr_to_reclaim -= reclaimed;
-	if (rp->nr_to_reclaim < 0)
-		rp->nr_to_reclaim = 0;
-
-	if (rp->nr_to_reclaim && (addr != end))
+	reclaim_pages_from_list(&page_list, vma);
+	if (addr != end)
 		goto cont;

 	cond_resched();
-	return (rp->nr_to_reclaim == 0) ? -EPIPE : 0;
+	return 0;
 }

 enum reclaim_type {
@ -1920,50 +1912,6 @@ enum reclaim_type {
 	RECLAIM_RANGE,
 };

-struct reclaim_param reclaim_task_anon(struct task_struct *task,
-		int nr_to_reclaim)
-{
-	struct mm_struct *mm;
-	struct vm_area_struct *vma;
-	struct mm_walk reclaim_walk = {};
-	struct reclaim_param rp = {
-		.nr_to_reclaim = nr_to_reclaim,
-	};
-
-	get_task_struct(task);
-	mm = get_task_mm(task);
-	if (!mm)
-		goto out;
-
-	reclaim_walk.mm = mm;
-	reclaim_walk.pmd_entry = reclaim_pte_range;
-
-	reclaim_walk.private = &rp;
-
-	down_read(&mm->mmap_sem);
-	for (vma = mm->mmap; vma; vma = vma->vm_next) {
-		if (is_vm_hugetlb_page(vma))
-			continue;
-
-		if (vma->vm_file)
-			continue;
-
-		if (!rp.nr_to_reclaim)
-			break;
-
-		rp.vma = vma;
-		walk_page_range(vma->vm_start, vma->vm_end,
-			&reclaim_walk);
-	}
-
-	flush_tlb_mm(mm);
-	up_read(&mm->mmap_sem);
-	mmput(mm);
-out:
-	put_task_struct(task);
-	return rp;
-}
-
 static ssize_t reclaim_write(struct file *file, const char __user *buf,
 				size_t count, loff_t *ppos)
 {
@ -1976,7 +1924,6 @@ static ssize_t reclaim_write(struct file *file, const char __user *buf,
 	struct mm_walk reclaim_walk = {};
 	unsigned long start = 0;
 	unsigned long end = 0;
-	struct reclaim_param rp;
 	int ret;

 	memset(buffer, 0, sizeof(buffer));
@ -2040,10 +1987,6 @@ static ssize_t reclaim_write(struct file *file, const char __user *buf,
 	reclaim_walk.mm = mm;
 	reclaim_walk.pmd_entry = reclaim_pte_range;

-	rp.nr_to_reclaim = INT_MAX;
-	rp.nr_reclaimed = 0;
-	reclaim_walk.private = &rp;
-
 	down_read(&mm->mmap_sem);
 	if (type == RECLAIM_RANGE) {
 		vma = find_vma(mm, start);
@ -2055,7 +1998,7 @@ static ssize_t reclaim_write(struct file *file, const char __user *buf,
 			if (is_vm_hugetlb_page(vma))
 				continue;

-			rp.vma = vma;
+			reclaim_walk.private = vma;
 			ret = walk_page_range(max(vma->vm_start, start),
 					min(vma->vm_end, end),
 					&reclaim_walk);
@ -2077,7 +2020,7 @@ static ssize_t reclaim_write(struct file *file, const char __user *buf,
 			if (type == RECLAIM_FILE && !vma->vm_file)
 				continue;

-			rp.vma = vma;
+			reclaim_walk.private = vma;
 			ret = walk_page_range(vma->vm_start, vma->vm_end,
 				&reclaim_walk);
 			if (ret)
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@ -2869,21 +2869,5 @@ static inline void setup_nr_node_ids(void) {}

 extern int want_old_faultaround_pte;

-#ifdef CONFIG_PROCESS_RECLAIM
-struct reclaim_param {
-	struct vm_area_struct *vma;
-	/* Number of pages scanned */
-	int nr_scanned;
-	/* max pages to reclaim */
-	int nr_to_reclaim;
-	/* pages reclaimed */
-	int nr_reclaimed;
-};
-extern struct reclaim_param reclaim_task_anon(struct task_struct *task,
-		int nr_to_reclaim);
-extern int reclaim_pte_range(pmd_t *pmd, unsigned long addr,
-				unsigned long end, struct mm_walk *walk);
-#endif
-
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
--- a/include/trace/events/process_reclaim.h
+++ b/include/trace/events/process_reclaim.h
@ -1,85 +0,0 @@
-/* Copyright (c) 2015-2018, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM process_reclaim
-
-#if !defined(_TRACE_EVENT_PROCESSRECLAIM_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_EVENT_PROCESSRECLAIM_H
-
-#include <linux/tracepoint.h>
-#include <linux/types.h>
-#include <linux/sched.h>
-
-TRACE_EVENT(process_reclaim,
-
-	TP_PROTO(int tasksize,
-		short oom_score_adj,
-		int nr_scanned, int nr_reclaimed,
-		int per_swap_size, int total_sz,
-		int nr_to_reclaim),
-
-	TP_ARGS(tasksize, oom_score_adj, nr_scanned,
-			nr_reclaimed, per_swap_size,
-			total_sz, nr_to_reclaim),
-
-	TP_STRUCT__entry(
-		__field(int, tasksize)
-		__field(short, oom_score_adj)
-		__field(int, nr_scanned)
-		__field(int, nr_reclaimed)
-		__field(int, per_swap_size)
-		__field(int, total_sz)
-		__field(int, nr_to_reclaim)
-	),
-
-	TP_fast_assign(
-		__entry->tasksize	= tasksize;
-		__entry->oom_score_adj	= oom_score_adj;
-		__entry->nr_scanned	= nr_scanned;
-		__entry->nr_reclaimed	= nr_reclaimed;
-		__entry->per_swap_size	= per_swap_size;
-		__entry->total_sz	= total_sz;
-		__entry->nr_to_reclaim	= nr_to_reclaim;
-	),
-
-	TP_printk("%d, %hd, %d, %d, %d, %d, %d",
-			__entry->tasksize, __entry->oom_score_adj,
-			__entry->nr_scanned, __entry->nr_reclaimed,
-			__entry->per_swap_size, __entry->total_sz,
-			__entry->nr_to_reclaim)
-);
-
-TRACE_EVENT(process_reclaim_eff,
-
-	TP_PROTO(int efficiency, int reclaim_avg_efficiency),
-
-	TP_ARGS(efficiency, reclaim_avg_efficiency),
-
-	TP_STRUCT__entry(
-		__field(int, efficiency)
-		__field(int, reclaim_avg_efficiency)
-	),
-
-	TP_fast_assign(
-		__entry->efficiency	= efficiency;
-		__entry->reclaim_avg_efficiency	= reclaim_avg_efficiency;
-	),
-
-	TP_printk("%d, %d", __entry->efficiency,
-		__entry->reclaim_avg_efficiency)
-);
-
-#endif
-
-#include <trace/define_trace.h>
-
--- a/mm/Makefile
+++ b/mm/Makefile
@ -104,4 +104,3 @@ obj-$(CONFIG_DAMON) += damon/
 obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o
 obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o
 obj-$(CONFIG_HMM) += hmm.o
-obj-$(CONFIG_PROCESS_RECLAIM)	+= process_reclaim.o
--- a/mm/process_reclaim.c
+++ b/mm/process_reclaim.c
@ -1,251 +0,0 @@
-/*
- * Copyright (c) 2015-2018, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/sort.h>
-#include <linux/oom.h>
-#include <linux/sched.h>
-#include <linux/rcupdate.h>
-#include <linux/notifier.h>
-#include <linux/vmpressure.h>
-
-#define CREATE_TRACE_POINTS
-#include <trace/events/process_reclaim.h>
-
-#define MAX_SWAP_TASKS SWAP_CLUSTER_MAX
-
-static void swap_fn(struct work_struct *work);
-DECLARE_WORK(swap_work, swap_fn);
-
-/* User knob to enable/disable process reclaim feature */
-static int enable_process_reclaim;
-module_param_named(enable_process_reclaim, enable_process_reclaim, int, 0644);
-
-/* The max number of pages tried to be reclaimed in a single run */
-int per_swap_size = SWAP_CLUSTER_MAX * 256;
-module_param_named(per_swap_size, per_swap_size, int, 0644);
-
-int reclaim_avg_efficiency;
-module_param_named(reclaim_avg_efficiency, reclaim_avg_efficiency, int, 0444);
-
-/* The vmpressure region where process reclaim operates */
-static unsigned long pressure_min = 50;
-static unsigned long pressure_max = 90;
-module_param_named(pressure_min, pressure_min, ulong, 0644);
-module_param_named(pressure_max, pressure_max, ulong, 0644);
-
-/*
- * Scheduling process reclaim workqueue unecessarily
- * when the reclaim efficiency is low does not make
- * sense. We try to detect a drop in efficiency and
- * disable reclaim for a time period. This period and the
- * period for which we monitor a drop in efficiency is
- * defined by swap_eff_win. swap_opt_eff is the optimal
- * efficincy used as theshold for this.
- */
-static int swap_eff_win = 2;
-module_param_named(swap_eff_win, swap_eff_win, int, 0644);
-
-static int swap_opt_eff = 50;
-module_param_named(swap_opt_eff, swap_opt_eff, int, 0644);
-
-static atomic_t skip_reclaim = ATOMIC_INIT(0);
-/* Not atomic since only a single instance of swap_fn run at a time */
-static int monitor_eff;
-
-struct selected_task {
-	struct task_struct *p;
-	int tasksize;
-	short oom_score_adj;
-};
-
-int selected_cmp(const void *a, const void *b)
-{
-	const struct selected_task *x = a;
-	const struct selected_task *y = b;
-	int ret;
-
-	ret = x->tasksize < y->tasksize ? -1 : 1;
-
-	return ret;
-}
-
-static int test_task_flag(struct task_struct *p, int flag)
-{
-	struct task_struct *t = p;
-
-	rcu_read_lock();
-	for_each_thread(p, t) {
-		task_lock(t);
-		if (test_tsk_thread_flag(t, flag)) {
-			task_unlock(t);
-			rcu_read_unlock();
-			return 1;
-		}
-		task_unlock(t);
-	}
-	rcu_read_unlock();
-
-	return 0;
-}
-
-static void swap_fn(struct work_struct *work)
-{
-	struct task_struct *tsk;
-	struct reclaim_param rp;
-
-	/* Pick the best MAX_SWAP_TASKS tasks in terms of anon size */
-	struct selected_task selected[MAX_SWAP_TASKS] = {{0, 0, 0},};
-	int si = 0;
-	int i;
-	int tasksize;
-	int total_sz = 0;
-	short min_score_adj = 360;
-	int total_scan = 0;
-	int total_reclaimed = 0;
-	int nr_to_reclaim;
-	int efficiency;
-
-	rcu_read_lock();
-	for_each_process(tsk) {
-		struct task_struct *p;
-		short oom_score_adj;
-
-		if (tsk->flags & PF_KTHREAD)
-			continue;
-
-		if (test_task_flag(tsk, TIF_MEMDIE))
-			continue;
-
-		p = find_lock_task_mm(tsk);
-		if (!p)
-			continue;
-
-		oom_score_adj = p->signal->oom_score_adj;
-		if (oom_score_adj < min_score_adj) {
-			task_unlock(p);
-			continue;
-		}
-
-		tasksize = get_mm_counter(p->mm, MM_ANONPAGES);
-		task_unlock(p);
-
-		if (tasksize <= 0)
-			continue;
-
-		if (si == MAX_SWAP_TASKS) {
-			sort(&selected[0], MAX_SWAP_TASKS,
-					sizeof(struct selected_task),
-					&selected_cmp, NULL);
-			if (tasksize < selected[0].tasksize)
-				continue;
-			selected[0].p = p;
-			selected[0].oom_score_adj = oom_score_adj;
-			selected[0].tasksize = tasksize;
-		} else {
-			selected[si].p = p;
-			selected[si].oom_score_adj = oom_score_adj;
-			selected[si].tasksize = tasksize;
-			si++;
-		}
-	}
-
-	for (i = 0; i < si; i++)
-		total_sz += selected[i].tasksize;
-
-	/* Skip reclaim if total size is too less */
-	if (total_sz < SWAP_CLUSTER_MAX) {
-		rcu_read_unlock();
-		return;
-	}
-
-	for (i = 0; i < si; i++)
-		get_task_struct(selected[i].p);
-
-	rcu_read_unlock();
-
-	while (si--) {
-		nr_to_reclaim =
-			(selected[si].tasksize * per_swap_size) / total_sz;
-		/* scan atleast a page */
-		if (!nr_to_reclaim)
-			nr_to_reclaim = 1;
-
-		rp = reclaim_task_anon(selected[si].p, nr_to_reclaim);
-
-		trace_process_reclaim(selected[si].tasksize,
-				selected[si].oom_score_adj, rp.nr_scanned,
-				rp.nr_reclaimed, per_swap_size, total_sz,
-				nr_to_reclaim);
-		total_scan += rp.nr_scanned;
-		total_reclaimed += rp.nr_reclaimed;
-		put_task_struct(selected[si].p);
-	}
-
-	if (total_scan) {
-		efficiency = (total_reclaimed * 100) / total_scan;
-
-		if (efficiency < swap_opt_eff) {
-			if (++monitor_eff == swap_eff_win) {
-				atomic_set(&skip_reclaim, swap_eff_win);
-				monitor_eff = 0;
-			}
-		} else {
-			monitor_eff = 0;
-		}
-
-		reclaim_avg_efficiency =
-			(efficiency + reclaim_avg_efficiency) / 2;
-		trace_process_reclaim_eff(efficiency, reclaim_avg_efficiency);
-	}
-}
-
-static int vmpressure_notifier(struct notifier_block *nb,
-			unsigned long action, void *data)
-{
-	unsigned long pressure = action;
-
-	if (!enable_process_reclaim)
-		return 0;
-
-	if (!current_is_kswapd())
-		return 0;
-
-	if (atomic_dec_if_positive(&skip_reclaim) >= 0)
-		return 0;
-
-	if ((pressure >= pressure_min) && (pressure < pressure_max))
-		if (!work_pending(&swap_work))
-			queue_work(system_unbound_wq, &swap_work);
-	return 0;
-}
-
-static struct notifier_block vmpr_nb = {
-	.notifier_call = vmpressure_notifier,
-};
-
-static int __init process_reclaim_init(void)
-{
-	vmpressure_notifier_register(&vmpr_nb);
-	return 0;
-}
-
-static void __exit process_reclaim_exit(void)
-{
-	vmpressure_notifier_unregister(&vmpr_nb);
-}
-
-module_init(process_reclaim_init);
-module_exit(process_reclaim_exit);