mirror of
https://github.com/rd-stuffs/msm-4.14.git
synced 2025-02-20 11:45:48 +08:00
memcg: remove direct page_cgroup-to-page pointer
In struct page_cgroup, we have a full word for flags but only a few are reserved. Use the remaining upper bits to encode, depending on configuration, the node or the section, to enable page_cgroup-to-page lookups without a direct pointer. This saves a full word for every page in a system with memory cgroups enabled. Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: Balbir Singh <balbir@linux.vnet.ibm.com> Cc: Minchan Kim <minchan.kim@gmail.com> Cc: Randy Dunlap <randy.dunlap@oracle.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
5564e88ba6
commit
6b3ae58efc
@ -1,8 +1,26 @@
|
|||||||
#ifndef __LINUX_PAGE_CGROUP_H
|
#ifndef __LINUX_PAGE_CGROUP_H
|
||||||
#define __LINUX_PAGE_CGROUP_H
|
#define __LINUX_PAGE_CGROUP_H
|
||||||
|
|
||||||
|
enum {
|
||||||
|
/* flags for mem_cgroup */
|
||||||
|
PCG_LOCK, /* Lock for pc->mem_cgroup and following bits. */
|
||||||
|
PCG_CACHE, /* charged as cache */
|
||||||
|
PCG_USED, /* this object is in use. */
|
||||||
|
PCG_MIGRATION, /* under page migration */
|
||||||
|
/* flags for mem_cgroup and file and I/O status */
|
||||||
|
PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */
|
||||||
|
PCG_FILE_MAPPED, /* page is accounted as "mapped" */
|
||||||
|
/* No lock in page_cgroup */
|
||||||
|
PCG_ACCT_LRU, /* page has been accounted for (under lru_lock) */
|
||||||
|
__NR_PCG_FLAGS,
|
||||||
|
};
|
||||||
|
|
||||||
|
#ifndef __GENERATING_BOUNDS_H
|
||||||
|
#include <generated/bounds.h>
|
||||||
|
|
||||||
#ifdef CONFIG_CGROUP_MEM_RES_CTLR
|
#ifdef CONFIG_CGROUP_MEM_RES_CTLR
|
||||||
#include <linux/bit_spinlock.h>
|
#include <linux/bit_spinlock.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Page Cgroup can be considered as an extended mem_map.
|
* Page Cgroup can be considered as an extended mem_map.
|
||||||
* A page_cgroup page is associated with every page descriptor. The
|
* A page_cgroup page is associated with every page descriptor. The
|
||||||
@ -13,7 +31,6 @@
|
|||||||
struct page_cgroup {
|
struct page_cgroup {
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
struct mem_cgroup *mem_cgroup;
|
struct mem_cgroup *mem_cgroup;
|
||||||
struct page *page;
|
|
||||||
struct list_head lru; /* per cgroup LRU list */
|
struct list_head lru; /* per cgroup LRU list */
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -32,19 +49,7 @@ static inline void __init page_cgroup_init(void)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
struct page_cgroup *lookup_page_cgroup(struct page *page);
|
struct page_cgroup *lookup_page_cgroup(struct page *page);
|
||||||
|
struct page *lookup_cgroup_page(struct page_cgroup *pc);
|
||||||
enum {
|
|
||||||
/* flags for mem_cgroup */
|
|
||||||
PCG_LOCK, /* Lock for pc->mem_cgroup and following bits. */
|
|
||||||
PCG_CACHE, /* charged as cache */
|
|
||||||
PCG_USED, /* this object is in use. */
|
|
||||||
PCG_MIGRATION, /* under page migration */
|
|
||||||
/* flags for mem_cgroup and file and I/O status */
|
|
||||||
PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */
|
|
||||||
PCG_FILE_MAPPED, /* page is accounted as "mapped" */
|
|
||||||
/* No lock in page_cgroup */
|
|
||||||
PCG_ACCT_LRU, /* page has been accounted for (under lru_lock) */
|
|
||||||
};
|
|
||||||
|
|
||||||
#define TESTPCGFLAG(uname, lname) \
|
#define TESTPCGFLAG(uname, lname) \
|
||||||
static inline int PageCgroup##uname(struct page_cgroup *pc) \
|
static inline int PageCgroup##uname(struct page_cgroup *pc) \
|
||||||
@ -117,6 +122,39 @@ static inline void move_unlock_page_cgroup(struct page_cgroup *pc,
|
|||||||
local_irq_restore(*flags);
|
local_irq_restore(*flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_SPARSEMEM
|
||||||
|
#define PCG_ARRAYID_WIDTH SECTIONS_SHIFT
|
||||||
|
#else
|
||||||
|
#define PCG_ARRAYID_WIDTH NODES_SHIFT
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if (PCG_ARRAYID_WIDTH > BITS_PER_LONG - NR_PCG_FLAGS)
|
||||||
|
#error Not enough space left in pc->flags to store page_cgroup array IDs
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* pc->flags: ARRAY-ID | FLAGS */
|
||||||
|
|
||||||
|
#define PCG_ARRAYID_MASK ((1UL << PCG_ARRAYID_WIDTH) - 1)
|
||||||
|
|
||||||
|
#define PCG_ARRAYID_OFFSET (BITS_PER_LONG - PCG_ARRAYID_WIDTH)
|
||||||
|
/*
|
||||||
|
* Zero the shift count for non-existant fields, to prevent compiler
|
||||||
|
* warnings and ensure references are optimized away.
|
||||||
|
*/
|
||||||
|
#define PCG_ARRAYID_SHIFT (PCG_ARRAYID_OFFSET * (PCG_ARRAYID_WIDTH != 0))
|
||||||
|
|
||||||
|
static inline void set_page_cgroup_array_id(struct page_cgroup *pc,
|
||||||
|
unsigned long id)
|
||||||
|
{
|
||||||
|
pc->flags &= ~(PCG_ARRAYID_MASK << PCG_ARRAYID_SHIFT);
|
||||||
|
pc->flags |= (id & PCG_ARRAYID_MASK) << PCG_ARRAYID_SHIFT;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline unsigned long page_cgroup_array_id(struct page_cgroup *pc)
|
||||||
|
{
|
||||||
|
return (pc->flags >> PCG_ARRAYID_SHIFT) & PCG_ARRAYID_MASK;
|
||||||
|
}
|
||||||
|
|
||||||
#else /* CONFIG_CGROUP_MEM_RES_CTLR */
|
#else /* CONFIG_CGROUP_MEM_RES_CTLR */
|
||||||
struct page_cgroup;
|
struct page_cgroup;
|
||||||
|
|
||||||
@ -137,7 +175,7 @@ static inline void __init page_cgroup_init_flatmem(void)
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif /* CONFIG_CGROUP_MEM_RES_CTLR */
|
||||||
|
|
||||||
#include <linux/swap.h>
|
#include <linux/swap.h>
|
||||||
|
|
||||||
@ -173,5 +211,8 @@ static inline void swap_cgroup_swapoff(int type)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif /* CONFIG_CGROUP_MEM_RES_CTLR_SWAP */
|
||||||
#endif
|
|
||||||
|
#endif /* !__GENERATING_BOUNDS_H */
|
||||||
|
|
||||||
|
#endif /* __LINUX_PAGE_CGROUP_H */
|
||||||
|
@ -9,11 +9,13 @@
|
|||||||
#include <linux/page-flags.h>
|
#include <linux/page-flags.h>
|
||||||
#include <linux/mmzone.h>
|
#include <linux/mmzone.h>
|
||||||
#include <linux/kbuild.h>
|
#include <linux/kbuild.h>
|
||||||
|
#include <linux/page_cgroup.h>
|
||||||
|
|
||||||
void foo(void)
|
void foo(void)
|
||||||
{
|
{
|
||||||
/* The enum constants to put into include/generated/bounds.h */
|
/* The enum constants to put into include/generated/bounds.h */
|
||||||
DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS);
|
DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS);
|
||||||
DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES);
|
DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES);
|
||||||
|
DEFINE(NR_PCG_FLAGS, __NR_PCG_FLAGS);
|
||||||
/* End of constants */
|
/* End of constants */
|
||||||
}
|
}
|
||||||
|
@ -1080,7 +1080,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
|
|||||||
if (unlikely(!PageCgroupUsed(pc)))
|
if (unlikely(!PageCgroupUsed(pc)))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
page = pc->page;
|
page = lookup_cgroup_page(pc);
|
||||||
|
|
||||||
if (unlikely(!PageLRU(page)))
|
if (unlikely(!PageLRU(page)))
|
||||||
continue;
|
continue;
|
||||||
@ -3344,7 +3344,7 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *mem,
|
|||||||
}
|
}
|
||||||
spin_unlock_irqrestore(&zone->lru_lock, flags);
|
spin_unlock_irqrestore(&zone->lru_lock, flags);
|
||||||
|
|
||||||
page = pc->page;
|
page = lookup_cgroup_page(pc);
|
||||||
|
|
||||||
ret = mem_cgroup_move_parent(page, pc, mem, GFP_KERNEL);
|
ret = mem_cgroup_move_parent(page, pc, mem, GFP_KERNEL);
|
||||||
if (ret == -ENOMEM)
|
if (ret == -ENOMEM)
|
||||||
|
@ -11,12 +11,11 @@
|
|||||||
#include <linux/swapops.h>
|
#include <linux/swapops.h>
|
||||||
#include <linux/kmemleak.h>
|
#include <linux/kmemleak.h>
|
||||||
|
|
||||||
static void __meminit
|
static void __meminit init_page_cgroup(struct page_cgroup *pc, unsigned long id)
|
||||||
__init_page_cgroup(struct page_cgroup *pc, unsigned long pfn)
|
|
||||||
{
|
{
|
||||||
pc->flags = 0;
|
pc->flags = 0;
|
||||||
|
set_page_cgroup_array_id(pc, id);
|
||||||
pc->mem_cgroup = NULL;
|
pc->mem_cgroup = NULL;
|
||||||
pc->page = pfn_to_page(pfn);
|
|
||||||
INIT_LIST_HEAD(&pc->lru);
|
INIT_LIST_HEAD(&pc->lru);
|
||||||
}
|
}
|
||||||
static unsigned long total_usage;
|
static unsigned long total_usage;
|
||||||
@ -43,6 +42,19 @@ struct page_cgroup *lookup_page_cgroup(struct page *page)
|
|||||||
return base + offset;
|
return base + offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct page *lookup_cgroup_page(struct page_cgroup *pc)
|
||||||
|
{
|
||||||
|
unsigned long pfn;
|
||||||
|
struct page *page;
|
||||||
|
pg_data_t *pgdat;
|
||||||
|
|
||||||
|
pgdat = NODE_DATA(page_cgroup_array_id(pc));
|
||||||
|
pfn = pc - pgdat->node_page_cgroup + pgdat->node_start_pfn;
|
||||||
|
page = pfn_to_page(pfn);
|
||||||
|
VM_BUG_ON(pc != lookup_page_cgroup(page));
|
||||||
|
return page;
|
||||||
|
}
|
||||||
|
|
||||||
static int __init alloc_node_page_cgroup(int nid)
|
static int __init alloc_node_page_cgroup(int nid)
|
||||||
{
|
{
|
||||||
struct page_cgroup *base, *pc;
|
struct page_cgroup *base, *pc;
|
||||||
@ -63,7 +75,7 @@ static int __init alloc_node_page_cgroup(int nid)
|
|||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
for (index = 0; index < nr_pages; index++) {
|
for (index = 0; index < nr_pages; index++) {
|
||||||
pc = base + index;
|
pc = base + index;
|
||||||
__init_page_cgroup(pc, start_pfn + index);
|
init_page_cgroup(pc, nid);
|
||||||
}
|
}
|
||||||
NODE_DATA(nid)->node_page_cgroup = base;
|
NODE_DATA(nid)->node_page_cgroup = base;
|
||||||
total_usage += table_size;
|
total_usage += table_size;
|
||||||
@ -105,46 +117,53 @@ struct page_cgroup *lookup_page_cgroup(struct page *page)
|
|||||||
return section->page_cgroup + pfn;
|
return section->page_cgroup + pfn;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct page *lookup_cgroup_page(struct page_cgroup *pc)
|
||||||
|
{
|
||||||
|
struct mem_section *section;
|
||||||
|
struct page *page;
|
||||||
|
unsigned long nr;
|
||||||
|
|
||||||
|
nr = page_cgroup_array_id(pc);
|
||||||
|
section = __nr_to_section(nr);
|
||||||
|
page = pfn_to_page(pc - section->page_cgroup);
|
||||||
|
VM_BUG_ON(pc != lookup_page_cgroup(page));
|
||||||
|
return page;
|
||||||
|
}
|
||||||
|
|
||||||
/* __alloc_bootmem...() is protected by !slab_available() */
|
/* __alloc_bootmem...() is protected by !slab_available() */
|
||||||
static int __init_refok init_section_page_cgroup(unsigned long pfn)
|
static int __init_refok init_section_page_cgroup(unsigned long pfn)
|
||||||
{
|
{
|
||||||
struct mem_section *section = __pfn_to_section(pfn);
|
|
||||||
struct page_cgroup *base, *pc;
|
struct page_cgroup *base, *pc;
|
||||||
|
struct mem_section *section;
|
||||||
unsigned long table_size;
|
unsigned long table_size;
|
||||||
|
unsigned long nr;
|
||||||
int nid, index;
|
int nid, index;
|
||||||
|
|
||||||
if (!section->page_cgroup) {
|
nr = pfn_to_section_nr(pfn);
|
||||||
nid = page_to_nid(pfn_to_page(pfn));
|
section = __nr_to_section(nr);
|
||||||
table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION;
|
|
||||||
VM_BUG_ON(!slab_is_available());
|
if (section->page_cgroup)
|
||||||
if (node_state(nid, N_HIGH_MEMORY)) {
|
return 0;
|
||||||
base = kmalloc_node(table_size,
|
|
||||||
GFP_KERNEL | __GFP_NOWARN, nid);
|
nid = page_to_nid(pfn_to_page(pfn));
|
||||||
if (!base)
|
table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION;
|
||||||
base = vmalloc_node(table_size, nid);
|
VM_BUG_ON(!slab_is_available());
|
||||||
} else {
|
if (node_state(nid, N_HIGH_MEMORY)) {
|
||||||
base = kmalloc(table_size, GFP_KERNEL | __GFP_NOWARN);
|
base = kmalloc_node(table_size,
|
||||||
if (!base)
|
GFP_KERNEL | __GFP_NOWARN, nid);
|
||||||
base = vmalloc(table_size);
|
if (!base)
|
||||||
}
|
base = vmalloc_node(table_size, nid);
|
||||||
/*
|
|
||||||
* The value stored in section->page_cgroup is (base - pfn)
|
|
||||||
* and it does not point to the memory block allocated above,
|
|
||||||
* causing kmemleak false positives.
|
|
||||||
*/
|
|
||||||
kmemleak_not_leak(base);
|
|
||||||
} else {
|
} else {
|
||||||
/*
|
base = kmalloc(table_size, GFP_KERNEL | __GFP_NOWARN);
|
||||||
* We don't have to allocate page_cgroup again, but
|
if (!base)
|
||||||
* address of memmap may be changed. So, we have to initialize
|
base = vmalloc(table_size);
|
||||||
* again.
|
|
||||||
*/
|
|
||||||
base = section->page_cgroup + pfn;
|
|
||||||
table_size = 0;
|
|
||||||
/* check address of memmap is changed or not. */
|
|
||||||
if (base->page == pfn_to_page(pfn))
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
/*
|
||||||
|
* The value stored in section->page_cgroup is (base - pfn)
|
||||||
|
* and it does not point to the memory block allocated above,
|
||||||
|
* causing kmemleak false positives.
|
||||||
|
*/
|
||||||
|
kmemleak_not_leak(base);
|
||||||
|
|
||||||
if (!base) {
|
if (!base) {
|
||||||
printk(KERN_ERR "page cgroup allocation failure\n");
|
printk(KERN_ERR "page cgroup allocation failure\n");
|
||||||
@ -153,7 +172,7 @@ static int __init_refok init_section_page_cgroup(unsigned long pfn)
|
|||||||
|
|
||||||
for (index = 0; index < PAGES_PER_SECTION; index++) {
|
for (index = 0; index < PAGES_PER_SECTION; index++) {
|
||||||
pc = base + index;
|
pc = base + index;
|
||||||
__init_page_cgroup(pc, pfn + index);
|
init_page_cgroup(pc, nr);
|
||||||
}
|
}
|
||||||
|
|
||||||
section->page_cgroup = base - pfn;
|
section->page_cgroup = base - pfn;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user