msm-4.14/kernel/sys_ni.c
Edgar Arriaga García e4493a4b48
BACKPORT: mm/madvise: introduce process_madvise() syscall: an external memory hinting API
There is usecase that System Management Software(SMS) want to give
a memory hint like MADV_[COLD|PAGEEOUT] to other processes and
in the case of Android, it is the ActivityManagerService.

It's similar in spirit to madvise(MADV_WONTNEED), but the information
required to make the reclaim decision is not known to the app. Instead,
it is known to the centralized userspace daemon(ActivityManagerService),
and that daemon must be able to initiate reclaim on its own without
any app involvement.

To solve the issue, this patch introduces a new syscall process_madvise(2).
It uses pidfd of an external process to give the hint.

 int process_madvise(int pidfd, void *addr, size_t length, int advise,
			unsigned long flag);

Since it could affect other process's address range, only privileged
process(CAP_SYS_PTRACE) or something else(e.g., being the same UID)
gives it the right to ptrace the process could use it successfully.
The flag argument is reserved for future use if we need to extend the
API.

I think supporting all hints madvise has/will supported/support to
process_madvise is rather risky. Because we are not sure all hints make
sense from external process and implementation for the hint may rely on
the caller being in the current context so it could be error-prone.
Thus, I just limited hints as MADV_[COLD|PAGEOUT] in this patch.

If someone want to add other hints, we could hear hear the usecase and
review it for each hint. It's safer for maintenance rather than
introducing a buggy syscall but hard to fix it later.

[1] https://developer.android.com/topic/performance/memory"
[2] process_getinfo for getting the cookie which is updated whenever
    vma of process address layout are changed - Daniel Colascione
- https://lore.kernel.org/lkml/20190520035254.57579-1-minchan@kernel.org/T/#m7694416fd179b2066a2c62b5b139b14e3894e224
[3] anonymous fd which is used for the object(i.e., address range)
    validation - Michal Hocko
- https://lore.kernel.org/lkml/20200120112722.GY18451@dhcp22.suse.cz/

Link: http://lkml.kernel.org/r/20200302193630.68771-3-minchan@kernel.org
Signed-off-by: Minchan Kim <minchan@kernel.org>
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Alexander Duyck <alexander.h.duyck@linux.intel.com>
Cc: Brian Geffon <bgeffon@google.com>
Cc: Christian Brauner <christian@brauner.io>
Cc: Daniel Colascione <dancol@google.com>
Cc: Jann Horn <jannh@google.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Joel Fernandes <joel@joelfernandes.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: John Dias <joaodias@google.com>
Cc: Kirill Tkhai <ktkhai@virtuozzo.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Oleksandr Natalenko <oleksandr@redhat.com>
Cc: Sandeep Patil <sspatil@google.com>
Cc: SeongJae Park <sj38.park@gmail.com>
Cc: SeongJae Park <sjpark@amazon.de>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Sonny Rao <sonnyrao@google.com>
Cc: Tim Murray <timmurray@google.com>
Cc: <linux-man@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

(cherry picked from commit ecb8ac8b1f146915aa6b96449b66dd48984caacc)

Conflicts:
    arch/alpha/kernel/syscalls/syscall.tbl
    arch/ia64/kernel/syscalls/syscall.tbl
    arch/m68k/kernel/syscalls/syscall.tbl
    arch/microblaze/kernel/syscalls/syscall.tbl
    arch/mips/kernel/syscalls/syscall_n32.tbl
    arch/mips/kernel/syscalls/syscall_n64.tbl
    arch/parisc/kernel/syscalls/syscall.tbl
    arch/powerpc/kernel/syscalls/syscall.tbl
    arch/s390/kernel/syscalls/syscall.tbl
    arch/sh/kernel/syscalls/syscall.tbl
    arch/sparc/kernel/syscalls/syscall.tbl
    mm/madvise.c

1. __NR_compat_syscalls in arch/arm64/include/asm/unistd.h modified to match latest version to avoid
clobbering old number.
2. Dropped syscall.tbl, syscall_n32, syscall_n64 files for architectures
not present in current kernel.
3. __NR_process_madvise in arch/arm64/include/asm/unistd32.h modified to
match latest mm tree.
4. Added include for uio.h lib which is needed for UIO_FASTIOV and iovec

Bug: 153444106
Test: Built kernel
Signed-off-by: Edgar Arriaga García <edgararriaga@google.com>

Change-Id: Icfff940abebcf290c3111239989ed40a407cf2a6
Signed-off-by: azrim <mirzaspc@gmail.com>
2022-07-31 09:36:45 +00:00

263 lines
7.3 KiB
C

// SPDX-License-Identifier: GPL-2.0
#include <linux/linkage.h>
#include <linux/errno.h>
#include <asm/unistd.h>
/* we can't #include <linux/syscalls.h> here,
but tell gcc to not warn with -Wmissing-prototypes */
asmlinkage long sys_ni_syscall(void);
/*
* Non-implemented system calls get redirected here.
*/
asmlinkage long sys_ni_syscall(void)
{
return -ENOSYS;
}
cond_syscall(sys_quotactl);
cond_syscall(sys32_quotactl);
cond_syscall(sys_acct);
cond_syscall(sys_lookup_dcookie);
cond_syscall(compat_sys_lookup_dcookie);
cond_syscall(sys_swapon);
cond_syscall(sys_swapoff);
cond_syscall(sys_kexec_load);
cond_syscall(compat_sys_kexec_load);
cond_syscall(sys_kexec_file_load);
cond_syscall(sys_init_module);
cond_syscall(sys_finit_module);
cond_syscall(sys_delete_module);
cond_syscall(sys_socketpair);
cond_syscall(sys_bind);
cond_syscall(sys_listen);
cond_syscall(sys_accept);
cond_syscall(sys_accept4);
cond_syscall(sys_connect);
cond_syscall(sys_getsockname);
cond_syscall(sys_getpeername);
cond_syscall(sys_sendto);
cond_syscall(sys_send);
cond_syscall(sys_recvfrom);
cond_syscall(sys_recv);
cond_syscall(sys_socket);
cond_syscall(sys_setsockopt);
cond_syscall(compat_sys_setsockopt);
cond_syscall(sys_getsockopt);
cond_syscall(compat_sys_getsockopt);
cond_syscall(sys_shutdown);
cond_syscall(sys_sendmsg);
cond_syscall(sys_sendmmsg);
cond_syscall(compat_sys_sendmsg);
cond_syscall(compat_sys_sendmmsg);
cond_syscall(sys_recvmsg);
cond_syscall(sys_recvmmsg);
cond_syscall(compat_sys_recvmsg);
cond_syscall(compat_sys_recv);
cond_syscall(compat_sys_recvfrom);
cond_syscall(compat_sys_recvmmsg);
cond_syscall(sys_socketcall);
cond_syscall(sys_futex);
cond_syscall(compat_sys_futex);
cond_syscall(sys_set_robust_list);
cond_syscall(compat_sys_set_robust_list);
cond_syscall(sys_get_robust_list);
cond_syscall(compat_sys_get_robust_list);
cond_syscall(sys_epoll_create);
cond_syscall(sys_epoll_create1);
cond_syscall(sys_epoll_ctl);
cond_syscall(sys_epoll_wait);
cond_syscall(sys_epoll_pwait);
cond_syscall(compat_sys_epoll_pwait);
cond_syscall(sys_semget);
cond_syscall(sys_semop);
cond_syscall(sys_semtimedop);
cond_syscall(compat_sys_semtimedop);
cond_syscall(sys_semctl);
cond_syscall(compat_sys_semctl);
cond_syscall(sys_msgget);
cond_syscall(sys_msgsnd);
cond_syscall(compat_sys_msgsnd);
cond_syscall(sys_msgrcv);
cond_syscall(compat_sys_msgrcv);
cond_syscall(sys_msgctl);
cond_syscall(compat_sys_msgctl);
cond_syscall(sys_shmget);
cond_syscall(sys_shmat);
cond_syscall(compat_sys_shmat);
cond_syscall(sys_shmdt);
cond_syscall(sys_shmctl);
cond_syscall(compat_sys_shmctl);
cond_syscall(sys_mq_open);
cond_syscall(sys_mq_unlink);
cond_syscall(sys_mq_timedsend);
cond_syscall(sys_mq_timedreceive);
cond_syscall(sys_mq_notify);
cond_syscall(sys_mq_getsetattr);
cond_syscall(compat_sys_mq_open);
cond_syscall(compat_sys_mq_timedsend);
cond_syscall(compat_sys_mq_timedreceive);
cond_syscall(compat_sys_mq_notify);
cond_syscall(compat_sys_mq_getsetattr);
cond_syscall(sys_mbind);
cond_syscall(sys_get_mempolicy);
cond_syscall(sys_set_mempolicy);
cond_syscall(compat_sys_mbind);
cond_syscall(compat_sys_get_mempolicy);
cond_syscall(compat_sys_set_mempolicy);
cond_syscall(sys_add_key);
cond_syscall(sys_request_key);
cond_syscall(sys_keyctl);
cond_syscall(compat_sys_keyctl);
cond_syscall(compat_sys_socketcall);
cond_syscall(sys_inotify_init);
cond_syscall(sys_inotify_init1);
cond_syscall(sys_inotify_add_watch);
cond_syscall(sys_inotify_rm_watch);
cond_syscall(sys_migrate_pages);
cond_syscall(sys_move_pages);
cond_syscall(sys_chown16);
cond_syscall(sys_fchown16);
cond_syscall(sys_getegid16);
cond_syscall(sys_geteuid16);
cond_syscall(sys_getgid16);
cond_syscall(sys_getgroups16);
cond_syscall(sys_getresgid16);
cond_syscall(sys_getresuid16);
cond_syscall(sys_getuid16);
cond_syscall(sys_lchown16);
cond_syscall(sys_setfsgid16);
cond_syscall(sys_setfsuid16);
cond_syscall(sys_setgid16);
cond_syscall(sys_setgroups16);
cond_syscall(sys_setregid16);
cond_syscall(sys_setresgid16);
cond_syscall(sys_setresuid16);
cond_syscall(sys_setreuid16);
cond_syscall(sys_setuid16);
cond_syscall(sys_sgetmask);
cond_syscall(sys_ssetmask);
cond_syscall(sys_vm86old);
cond_syscall(sys_vm86);
cond_syscall(sys_modify_ldt);
cond_syscall(sys_ipc);
cond_syscall(compat_sys_ipc);
cond_syscall(compat_sys_sysctl);
cond_syscall(sys_flock);
cond_syscall(sys_io_setup);
cond_syscall(sys_io_destroy);
cond_syscall(sys_io_submit);
cond_syscall(sys_io_cancel);
cond_syscall(sys_io_getevents);
cond_syscall(compat_sys_io_setup);
cond_syscall(compat_sys_io_submit);
cond_syscall(compat_sys_io_getevents);
cond_syscall(sys_sysfs);
cond_syscall(sys_syslog);
cond_syscall(sys_process_vm_readv);
cond_syscall(sys_process_vm_writev);
cond_syscall(compat_sys_process_vm_readv);
cond_syscall(compat_sys_process_vm_writev);
cond_syscall(sys_uselib);
cond_syscall(sys_fadvise64);
cond_syscall(sys_fadvise64_64);
cond_syscall(sys_madvise);
cond_syscall(sys_setuid);
cond_syscall(sys_setregid);
cond_syscall(sys_setgid);
cond_syscall(sys_setreuid);
cond_syscall(sys_setresuid);
cond_syscall(sys_getresuid);
cond_syscall(sys_setresgid);
cond_syscall(sys_getresgid);
cond_syscall(sys_setgroups);
cond_syscall(sys_getgroups);
cond_syscall(sys_setfsuid);
cond_syscall(sys_setfsgid);
cond_syscall(sys_capget);
cond_syscall(sys_capset);
cond_syscall(sys_copy_file_range);
/* arch-specific weak syscall entries */
cond_syscall(sys_pciconfig_read);
cond_syscall(sys_pciconfig_write);
cond_syscall(sys_pciconfig_iobase);
cond_syscall(compat_sys_s390_ipc);
cond_syscall(ppc_rtas);
cond_syscall(sys_spu_run);
cond_syscall(sys_spu_create);
cond_syscall(sys_subpage_prot);
cond_syscall(sys_s390_pci_mmio_read);
cond_syscall(sys_s390_pci_mmio_write);
/* mmu depending weak syscall entries */
cond_syscall(sys_mprotect);
cond_syscall(sys_msync);
cond_syscall(sys_mlock);
cond_syscall(sys_munlock);
cond_syscall(sys_mlockall);
cond_syscall(sys_munlockall);
cond_syscall(sys_mlock2);
cond_syscall(sys_mincore);
cond_syscall(sys_madvise);
cond_syscall(process_madvise);
cond_syscall(sys_mremap);
cond_syscall(sys_remap_file_pages);
cond_syscall(compat_sys_move_pages);
cond_syscall(compat_sys_migrate_pages);
/* block-layer dependent */
cond_syscall(sys_bdflush);
cond_syscall(sys_ioprio_set);
cond_syscall(sys_ioprio_get);
/* New file descriptors */
cond_syscall(sys_signalfd);
cond_syscall(sys_signalfd4);
cond_syscall(compat_sys_signalfd);
cond_syscall(compat_sys_signalfd4);
cond_syscall(sys_timerfd_create);
cond_syscall(sys_timerfd_settime);
cond_syscall(sys_timerfd_gettime);
cond_syscall(compat_sys_timerfd_settime);
cond_syscall(compat_sys_timerfd_gettime);
cond_syscall(sys_eventfd);
cond_syscall(sys_eventfd2);
cond_syscall(sys_memfd_create);
cond_syscall(sys_userfaultfd);
/* performance counters: */
cond_syscall(sys_perf_event_open);
/* fanotify! */
cond_syscall(sys_fanotify_init);
cond_syscall(sys_fanotify_mark);
cond_syscall(compat_sys_fanotify_mark);
/* open by handle */
cond_syscall(sys_name_to_handle_at);
cond_syscall(sys_open_by_handle_at);
cond_syscall(compat_sys_open_by_handle_at);
/* compare kernel pointers */
cond_syscall(sys_kcmp);
/* operate on Secure Computing state */
cond_syscall(sys_seccomp);
/* access BPF programs and maps */
cond_syscall(sys_bpf);
/* execveat */
cond_syscall(sys_execveat);
/* membarrier */
cond_syscall(sys_membarrier);
/* memory protection keys */
cond_syscall(sys_pkey_mprotect);
cond_syscall(sys_pkey_alloc);
cond_syscall(sys_pkey_free);