mirror of
https://github.com/rd-stuffs/msm-4.14.git
synced 2025-02-20 11:45:48 +08:00
powerpc: Speed up clear_page by unrolling it
Unroll clear_page 8 times. A simple microbenchmark which allocates and frees a zeroed page: for (i = 0; i < iterations; i++) { unsigned long p = __get_free_page(GFP_KERNEL | __GFP_ZERO); free_page(p); } improves 20% on POWER8. This assumes cacheline sizes won't grow beyond 512 bytes or page sizes wont drop below 1kB, which is unlikely, but we could add a runtime check during early init if it makes people nervous. Michael found that some versions of gcc produce quite bad code (all multiplies), so we give gcc a hand by using shifts and adds. Signed-off-by: Anton Blanchard <anton@samba.org> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
This commit is contained in:
parent
2013add4ce
commit
e35735b9a5
@ -42,20 +42,40 @@
|
||||
|
||||
typedef unsigned long pte_basic_t;
|
||||
|
||||
static __inline__ void clear_page(void *addr)
|
||||
static inline void clear_page(void *addr)
|
||||
{
|
||||
unsigned long lines, line_size;
|
||||
unsigned long iterations;
|
||||
unsigned long onex, twox, fourx, eightx;
|
||||
|
||||
line_size = ppc64_caches.dline_size;
|
||||
lines = ppc64_caches.dlines_per_page;
|
||||
iterations = ppc64_caches.dlines_per_page / 8;
|
||||
|
||||
__asm__ __volatile__(
|
||||
/*
|
||||
* Some verisions of gcc use multiply instructions to
|
||||
* calculate the offsets so lets give it a hand to
|
||||
* do better.
|
||||
*/
|
||||
onex = ppc64_caches.dline_size;
|
||||
twox = onex << 1;
|
||||
fourx = onex << 2;
|
||||
eightx = onex << 3;
|
||||
|
||||
asm volatile(
|
||||
"mtctr %1 # clear_page\n\
|
||||
1: dcbz 0,%0\n\
|
||||
add %0,%0,%3\n\
|
||||
.balign 16\n\
|
||||
1: dcbz 0,%0\n\
|
||||
dcbz %3,%0\n\
|
||||
dcbz %4,%0\n\
|
||||
dcbz %5,%0\n\
|
||||
dcbz %6,%0\n\
|
||||
dcbz %7,%0\n\
|
||||
dcbz %8,%0\n\
|
||||
dcbz %9,%0\n\
|
||||
add %0,%0,%10\n\
|
||||
bdnz+ 1b"
|
||||
: "=r" (addr)
|
||||
: "r" (lines), "0" (addr), "r" (line_size)
|
||||
: "=&r" (addr)
|
||||
: "r" (iterations), "0" (addr), "b" (onex), "b" (twox),
|
||||
"b" (twox+onex), "b" (fourx), "b" (fourx+onex),
|
||||
"b" (twox+fourx), "b" (eightx-onex), "r" (eightx)
|
||||
: "ctr", "memory");
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user