mirror of
https://github.com/rd-stuffs/msm-4.14.git
synced 2025-02-20 11:45:48 +08:00
I noticed __clear_user high up in a profile of one of my RAID stress tests. The testcase was doing a dd from /dev/zero which ends up calling __clear_user. __clear_user is basically a loop with a single 4 byte store which is horribly slow. We can do much better by aligning the desination and doing 32 bytes of 8 byte stores in a loop. The following testcase was used to verify the patch: http://ozlabs.org/~anton/junkcode/stress_clear_user.c To show the improvement in performance I ran a dd from /dev/zero to /dev/null on a POWER7 box: Before: # dd if=/dev/zero of=/dev/null bs=1M count=10000 10485760000 bytes (10 GB) copied, 3.72379 s, 2.8 GB/s After: # time dd if=/dev/zero of=/dev/null bs=1M count=10000 10485760000 bytes (10 GB) copied, 0.728318 s, 14.4 GB/s Over 5x faster. Signed-off-by: Anton Blanchard <anton@samba.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
35 lines
954 B
Makefile
35 lines
954 B
Makefile
#
|
|
# Makefile for ppc-specific library files..
|
|
#
|
|
|
|
subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
|
|
|
|
ccflags-$(CONFIG_PPC64) := -mno-minimal-toc
|
|
|
|
CFLAGS_REMOVE_code-patching.o = -pg
|
|
CFLAGS_REMOVE_feature-fixups.o = -pg
|
|
|
|
obj-y := string.o alloc.o \
|
|
checksum_$(CONFIG_WORD_SIZE).o crtsavres.o
|
|
obj-$(CONFIG_PPC32) += div64.o copy_32.o
|
|
obj-$(CONFIG_HAS_IOMEM) += devres.o
|
|
|
|
obj-$(CONFIG_PPC64) += copypage_64.o copyuser_64.o \
|
|
memcpy_64.o usercopy_64.o mem_64.o string.o \
|
|
checksum_wrappers_64.o hweight_64.o \
|
|
copyuser_power7.o string_64.o
|
|
obj-$(CONFIG_XMON) += sstep.o ldstfp.o
|
|
obj-$(CONFIG_KPROBES) += sstep.o ldstfp.o
|
|
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += sstep.o ldstfp.o
|
|
|
|
ifeq ($(CONFIG_PPC64),y)
|
|
obj-$(CONFIG_SMP) += locks.o
|
|
obj-$(CONFIG_ALTIVEC) += copyuser_power7_vmx.o
|
|
endif
|
|
|
|
obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o
|
|
|
|
obj-y += code-patching.o
|
|
obj-y += feature-fixups.o
|
|
obj-$(CONFIG_FTR_FIXUP_SELFTEST) += feature-fixups-test.o
|