From d2818a65ad63ed1b8b9cdf2801fda90576aa2c40 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 24 May 2018 13:57:26 -0700 Subject: [PATCH 01/59] FROMLIST: f2fs: early updates queued for v4.18-rc1 Cherry-picked from: origin/upstream-f2fs-stable-linux-4.14.y 72497b8d5271 ("f2fs: turn down IO priority of discard from background") cfe6e2f69eef ("f2fs: don't split checkpoint in fstrim") 596c97672db2 ("f2fs: issue discard commands proactively in high fs utilization") 0fbb04e9980a ("f2fs: add fsync_mode=nobarrier for non-atomic files") 13e346436f4e ("f2fs: let fstrim issue discard commands in lower priority") Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 1 + Documentation/filesystems/f2fs.txt | 16 +- fs/f2fs/f2fs.h | 13 +- fs/f2fs/file.c | 2 +- fs/f2fs/segment.c | 246 ++++++++++++------------ fs/f2fs/super.c | 4 + fs/f2fs/sysfs.c | 3 + 7 files changed, 148 insertions(+), 137 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 540553c933b6..372b88f4e706 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -101,6 +101,7 @@ Date: February 2015 Contact: "Jaegeuk Kim" Description: Controls the trimming rate in batch mode. + What: /sys/fs/f2fs//cp_interval Date: October 2015 diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 4b6cf4c5e061..37d8698ca2d6 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -182,13 +182,15 @@ whint_mode=%s Control which write hints are passed down to block passes down hints with its policy. alloc_mode=%s Adjust block allocation policy, which supports "reuse" and "default". -fsync_mode=%s Control the policy of fsync. Currently supports "posix" - and "strict". In "posix" mode, which is default, fsync - will follow POSIX semantics and does a light operation - to improve the filesystem performance. In "strict" mode, - fsync will be heavy and behaves in line with xfs, ext4 - and btrfs, where xfstest generic/342 will pass, but the - performance will regress. +fsync_mode=%s Control the policy of fsync. Currently supports "posix", + "strict", and "nobarrier". In "posix" mode, which is + default, fsync will follow POSIX semantics and does a + light operation to improve the filesystem performance. + In "strict" mode, fsync will be heavy and behaves in line + with xfs, ext4 and btrfs, where xfstest generic/342 will + pass, but the performance will regress. "nobarrier" is + based on "posix", but doesn't issue flush command for + non-atomic files likewise "nobarrier" mount option. test_dummy_encryption Enable dummy encryption, which provides a fake fscrypt context. The fake fscrypt context is used by xfstests. diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f32a0c79702f..534872d064bc 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -176,15 +176,12 @@ enum { #define CP_DISCARD 0x00000010 #define CP_TRIMMED 0x00000020 -#define DEF_BATCHED_TRIM_SECTIONS 2048 -#define BATCHED_TRIM_SEGMENTS(sbi) \ - (GET_SEG_FROM_SEC(sbi, SM_I(sbi)->trim_sections)) -#define BATCHED_TRIM_BLOCKS(sbi) \ - (BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg) #define MAX_DISCARD_BLOCKS(sbi) BLKS_PER_SEC(sbi) #define DEF_MAX_DISCARD_REQUEST 8 /* issue 8 discards per round */ +#define DEF_MAX_DISCARD_LEN 512 /* Max. 2MB per discard */ #define DEF_MIN_DISCARD_ISSUE_TIME 50 /* 50 ms, if exists */ #define DEF_MAX_DISCARD_ISSUE_TIME 60000 /* 60 s, if no candidates */ +#define DEF_DISCARD_URGENT_UTIL 80 /* do more discard over 80% */ #define DEF_CP_INTERVAL 60 /* 60 secs */ #define DEF_IDLE_INTERVAL 5 /* 5 secs */ @@ -694,7 +691,8 @@ static inline void set_extent_info(struct extent_info *ei, unsigned int fofs, static inline bool __is_discard_mergeable(struct discard_info *back, struct discard_info *front) { - return back->lstart + back->len == front->lstart; + return (back->lstart + back->len == front->lstart) && + (back->len + front->len < DEF_MAX_DISCARD_LEN); } static inline bool __is_discard_back_mergeable(struct discard_info *cur, @@ -1080,6 +1078,7 @@ enum { enum fsync_mode { FSYNC_MODE_POSIX, /* fsync follows posix semantics */ FSYNC_MODE_STRICT, /* fsync behaves in line with ext4 */ + FSYNC_MODE_NOBARRIER, /* fsync behaves nobarrier based on posix */ }; #ifdef CONFIG_F2FS_FS_ENCRYPTION @@ -2774,8 +2773,6 @@ int f2fs_flush_device_cache(struct f2fs_sb_info *sbi); void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free); void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr); bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr); -void init_discard_policy(struct discard_policy *dpolicy, int discard_type, - unsigned int granularity); void drop_discard_cmd(struct f2fs_sb_info *sbi); void stop_discard_thread(struct f2fs_sb_info *sbi); bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index cb231b004e61..ed72fc2cc68d 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -306,7 +306,7 @@ sync_nodes: remove_ino_entry(sbi, ino, APPEND_INO); clear_inode_flag(inode, FI_APPEND_WRITE); flush_out: - if (!atomic) + if (!atomic && F2FS_OPTION(sbi).fsync_mode != FSYNC_MODE_NOBARRIER) ret = f2fs_issue_flush(sbi, inode->i_ino); if (!ret) { remove_ino_entry(sbi, ino, UPDATE_INO); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 1e365e913858..bef74d628f66 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -915,6 +915,39 @@ static void __check_sit_bitmap(struct f2fs_sb_info *sbi, #endif } +static void __init_discard_policy(struct f2fs_sb_info *sbi, + struct discard_policy *dpolicy, + int discard_type, unsigned int granularity) +{ + /* common policy */ + dpolicy->type = discard_type; + dpolicy->sync = true; + dpolicy->granularity = granularity; + + dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST; + dpolicy->io_aware_gran = MAX_PLIST_NUM; + + if (discard_type == DPOLICY_BG) { + dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; + dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; + dpolicy->io_aware = true; + dpolicy->sync = false; + if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) { + dpolicy->granularity = 1; + dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME; + } + } else if (discard_type == DPOLICY_FORCE) { + dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; + dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; + dpolicy->io_aware = false; + } else if (discard_type == DPOLICY_FSTRIM) { + dpolicy->io_aware = false; + } else if (discard_type == DPOLICY_UMOUNT) { + dpolicy->io_aware = false; + } +} + + /* this function is copied from blkdev_issue_discard from block/blk-lib.c */ static void __submit_discard_cmd(struct f2fs_sb_info *sbi, struct discard_policy *dpolicy, @@ -1130,68 +1163,6 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi, return 0; } -static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi, - struct discard_policy *dpolicy, - unsigned int start, unsigned int end) -{ - struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - struct discard_cmd *prev_dc = NULL, *next_dc = NULL; - struct rb_node **insert_p = NULL, *insert_parent = NULL; - struct discard_cmd *dc; - struct blk_plug plug; - int issued; - -next: - issued = 0; - - mutex_lock(&dcc->cmd_lock); - f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root)); - - dc = (struct discard_cmd *)__lookup_rb_tree_ret(&dcc->root, - NULL, start, - (struct rb_entry **)&prev_dc, - (struct rb_entry **)&next_dc, - &insert_p, &insert_parent, true); - if (!dc) - dc = next_dc; - - blk_start_plug(&plug); - - while (dc && dc->lstart <= end) { - struct rb_node *node; - - if (dc->len < dpolicy->granularity) - goto skip; - - if (dc->state != D_PREP) { - list_move_tail(&dc->list, &dcc->fstrim_list); - goto skip; - } - - __submit_discard_cmd(sbi, dpolicy, dc); - - if (++issued >= dpolicy->max_requests) { - start = dc->lstart + dc->len; - - blk_finish_plug(&plug); - mutex_unlock(&dcc->cmd_lock); - - schedule(); - - goto next; - } -skip: - node = rb_next(&dc->rb_node); - dc = rb_entry_safe(node, struct discard_cmd, rb_node); - - if (fatal_signal_pending(current)) - break; - } - - blk_finish_plug(&plug); - mutex_unlock(&dcc->cmd_lock); -} - static int __issue_discard_cmd(struct f2fs_sb_info *sbi, struct discard_policy *dpolicy) { @@ -1332,7 +1303,18 @@ next: static void __wait_all_discard_cmd(struct f2fs_sb_info *sbi, struct discard_policy *dpolicy) { - __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX); + struct discard_policy dp; + + if (dpolicy) { + __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX); + return; + } + + /* wait all */ + __init_discard_policy(sbi, &dp, DPOLICY_FSTRIM, 1); + __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX); + __init_discard_policy(sbi, &dp, DPOLICY_UMOUNT, 1); + __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX); } /* This should be covered by global mutex, &sit_i->sentry_lock */ @@ -1377,11 +1359,13 @@ bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi) struct discard_policy dpolicy; bool dropped; - init_discard_policy(&dpolicy, DPOLICY_UMOUNT, dcc->discard_granularity); + __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT, + dcc->discard_granularity); __issue_discard_cmd(sbi, &dpolicy); dropped = __drop_discard_cmd(sbi); - __wait_all_discard_cmd(sbi, &dpolicy); + /* just to make sure there is no pending discard commands */ + __wait_all_discard_cmd(sbi, NULL); return dropped; } @@ -1397,7 +1381,7 @@ static int issue_discard_thread(void *data) set_freezable(); do { - init_discard_policy(&dpolicy, DPOLICY_BG, + __init_discard_policy(sbi, &dpolicy, DPOLICY_BG, dcc->discard_granularity); wait_event_interruptible_timeout(*q, @@ -1415,7 +1399,7 @@ static int issue_discard_thread(void *data) dcc->discard_wake = 0; if (sbi->gc_thread && sbi->gc_thread->gc_urgent) - init_discard_policy(&dpolicy, DPOLICY_FORCE, 1); + __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1); sb_start_intwrite(sbi->sb); @@ -1708,32 +1692,6 @@ skip: wake_up_discard_thread(sbi, false); } -void init_discard_policy(struct discard_policy *dpolicy, - int discard_type, unsigned int granularity) -{ - /* common policy */ - dpolicy->type = discard_type; - dpolicy->sync = true; - dpolicy->granularity = granularity; - - dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST; - dpolicy->io_aware_gran = MAX_PLIST_NUM; - - if (discard_type == DPOLICY_BG) { - dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; - dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; - dpolicy->io_aware = true; - } else if (discard_type == DPOLICY_FORCE) { - dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; - dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; - dpolicy->io_aware = false; - } else if (discard_type == DPOLICY_FSTRIM) { - dpolicy->io_aware = false; - } else if (discard_type == DPOLICY_UMOUNT) { - dpolicy->io_aware = false; - } -} - static int create_discard_cmd_control(struct f2fs_sb_info *sbi) { dev_t dev = sbi->sb->s_bdev->bd_dev; @@ -2373,11 +2331,72 @@ bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc) return has_candidate; } +static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi, + struct discard_policy *dpolicy, + unsigned int start, unsigned int end) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct discard_cmd *prev_dc = NULL, *next_dc = NULL; + struct rb_node **insert_p = NULL, *insert_parent = NULL; + struct discard_cmd *dc; + struct blk_plug plug; + int issued; + +next: + issued = 0; + + mutex_lock(&dcc->cmd_lock); + f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root)); + + dc = (struct discard_cmd *)__lookup_rb_tree_ret(&dcc->root, + NULL, start, + (struct rb_entry **)&prev_dc, + (struct rb_entry **)&next_dc, + &insert_p, &insert_parent, true); + if (!dc) + dc = next_dc; + + blk_start_plug(&plug); + + while (dc && dc->lstart <= end) { + struct rb_node *node; + + if (dc->len < dpolicy->granularity) + goto skip; + + if (dc->state != D_PREP) { + list_move_tail(&dc->list, &dcc->fstrim_list); + goto skip; + } + + __submit_discard_cmd(sbi, dpolicy, dc); + + if (++issued >= dpolicy->max_requests) { + start = dc->lstart + dc->len; + + blk_finish_plug(&plug); + mutex_unlock(&dcc->cmd_lock); + __wait_all_discard_cmd(sbi, NULL); + congestion_wait(BLK_RW_ASYNC, HZ/50); + goto next; + } +skip: + node = rb_next(&dc->rb_node); + dc = rb_entry_safe(node, struct discard_cmd, rb_node); + + if (fatal_signal_pending(current)) + break; + } + + blk_finish_plug(&plug); + mutex_unlock(&dcc->cmd_lock); +} + int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) { __u64 start = F2FS_BYTES_TO_BLK(range->start); __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1; - unsigned int start_segno, end_segno, cur_segno; + unsigned int start_segno, end_segno; block_t start_block, end_block; struct cp_control cpc; struct discard_policy dpolicy; @@ -2403,40 +2422,27 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) cpc.reason = CP_DISCARD; cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen)); + cpc.trim_start = start_segno; + cpc.trim_end = end_segno; - /* do checkpoint to issue discard commands safely */ - for (cur_segno = start_segno; cur_segno <= end_segno; - cur_segno = cpc.trim_end + 1) { - cpc.trim_start = cur_segno; + if (sbi->discard_blks == 0) + goto out; - if (sbi->discard_blks == 0) - break; - else if (sbi->discard_blks < BATCHED_TRIM_BLOCKS(sbi)) - cpc.trim_end = end_segno; - else - cpc.trim_end = min_t(unsigned int, - rounddown(cur_segno + - BATCHED_TRIM_SEGMENTS(sbi), - sbi->segs_per_sec) - 1, end_segno); - - mutex_lock(&sbi->gc_mutex); - err = write_checkpoint(sbi, &cpc); - mutex_unlock(&sbi->gc_mutex); - if (err) - break; - - schedule(); - } + mutex_lock(&sbi->gc_mutex); + err = write_checkpoint(sbi, &cpc); + mutex_unlock(&sbi->gc_mutex); + if (err) + goto out; start_block = START_BLOCK(sbi, start_segno); - end_block = START_BLOCK(sbi, min(cur_segno, end_segno) + 1); + end_block = START_BLOCK(sbi, end_segno + 1); - init_discard_policy(&dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen); + __init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen); __issue_discard_cmd_range(sbi, &dpolicy, start_block, end_block); trimmed = __wait_discard_cmd_range(sbi, &dpolicy, start_block, end_block); -out: range->len = F2FS_BLK_TO_BYTES(trimmed); +out: return err; } @@ -3823,8 +3829,6 @@ int build_segment_manager(struct f2fs_sb_info *sbi) sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS; sm_info->min_ssr_sections = reserved_sections(sbi); - sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS; - INIT_LIST_HEAD(&sm_info->sit_entry_set); init_rwsem(&sm_info->curseg_lock); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 7cbddecdf41f..2b79c1a7a2f2 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -740,6 +740,10 @@ static int parse_options(struct super_block *sb, char *options) } else if (strlen(name) == 6 && !strncmp(name, "strict", 6)) { F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_STRICT; + } else if (strlen(name) == 9 && + !strncmp(name, "nobarrier", 9)) { + F2FS_OPTION(sbi).fsync_mode = + FSYNC_MODE_NOBARRIER; } else { kfree(name); return -EINVAL; diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index f33a56d6e6dd..2c53de9251be 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -245,6 +245,9 @@ out: return count; } + if (!strcmp(a->attr.name, "trim_sections")) + return -EINVAL; + *ui = t; if (!strcmp(a->attr.name, "iostat_enable") && *ui == 0) From b62ab25d1275a38c2758a528d9cf2ae34398a992 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 31 May 2018 17:58:13 +0200 Subject: [PATCH 02/59] Revert "pinctrl: msm: Use dynamic GPIO numbering" This reverts commit bd36ea57d6d58041180c4f7d2c2bf5194c26845d which is commit a7aa75a2a7dba32594291a71c3704000a2fd7089 upstream. There's been too many complaints about this. Personally I think it's going to blow up when people hit this in mainline, but hey, it's not my systems. At least we don't have to backport the mess to the stable kernels to give them some more life to live unscathed :) Reported-by: Timur Tabi Reported-by: Sebastian Gottschall Cc: Bjorn Andersson Cc: Linus Walleij Cc: Sasha Levin Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/qcom/pinctrl-msm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c index 19cd357bb464..ff491da64dab 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm.c +++ b/drivers/pinctrl/qcom/pinctrl-msm.c @@ -818,7 +818,7 @@ static int msm_gpio_init(struct msm_pinctrl *pctrl) return -EINVAL; chip = &pctrl->chip; - chip->base = -1; + chip->base = 0; chip->ngpio = ngpio; chip->label = dev_name(pctrl->dev); chip->parent = pctrl->dev; From 1ac7f5b6dab54d515220c7d7d6a29d87ed5d465b Mon Sep 17 00:00:00 2001 From: Alistair Strachan Date: Thu, 31 May 2018 13:36:29 -0700 Subject: [PATCH 03/59] ANDROID: Update x86_64_cuttlefish_defconfig Merge with the configs from kernel/configs.git added recently. This should fix ipsec VPN functionality. Bug: 80540078 Change-Id: I9cc99f5e34d2809670fe2fc0df121610657f6769 Signed-off-by: Alistair Strachan --- arch/x86/configs/x86_64_cuttlefish_defconfig | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index 90be23b26f21..1c6247dd06d9 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -32,6 +32,7 @@ CONFIG_OPROFILE=y CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y CONFIG_CC_STACKPROTECTOR_STRONG=y +CONFIG_REFCOUNT_FULL=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y @@ -89,8 +90,8 @@ CONFIG_IP_MROUTE=y CONFIG_IP_PIMSM_V1=y CONFIG_IP_PIMSM_V2=y CONFIG_SYN_COOKIES=y +CONFIG_NET_IPVTI=y CONFIG_INET_ESP=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_BEET is not set CONFIG_INET_DIAG_DESTROY=y CONFIG_TCP_CONG_ADVANCED=y @@ -105,6 +106,7 @@ CONFIG_INET6_AH=y CONFIG_INET6_ESP=y CONFIG_INET6_IPCOMP=y CONFIG_IPV6_MIP6=y +CONFIG_IPV6_VTI=y CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_NETLABEL=y CONFIG_NETFILTER=y @@ -131,6 +133,7 @@ CONFIG_NETFILTER_XT_TARGET_TPROXY=y CONFIG_NETFILTER_XT_TARGET_TRACE=y CONFIG_NETFILTER_XT_TARGET_SECMARK=y CONFIG_NETFILTER_XT_TARGET_TCPMSS=y +CONFIG_NETFILTER_XT_MATCH_BPF=y CONFIG_NETFILTER_XT_MATCH_COMMENT=y CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y CONFIG_NETFILTER_XT_MATCH_CONNMARK=y @@ -144,14 +147,17 @@ CONFIG_NETFILTER_XT_MATCH_MAC=y CONFIG_NETFILTER_XT_MATCH_MARK=y CONFIG_NETFILTER_XT_MATCH_POLICY=y CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y +CONFIG_NETFILTER_XT_MATCH_QTAGUID=y CONFIG_NETFILTER_XT_MATCH_QUOTA=y CONFIG_NETFILTER_XT_MATCH_QUOTA2=y +CONFIG_NETFILTER_XT_MATCH_SOCKET=y CONFIG_NETFILTER_XT_MATCH_STATE=y CONFIG_NETFILTER_XT_MATCH_STATISTIC=y CONFIG_NETFILTER_XT_MATCH_STRING=y CONFIG_NETFILTER_XT_MATCH_TIME=y CONFIG_NETFILTER_XT_MATCH_U32=y CONFIG_NF_CONNTRACK_IPV4=y +CONFIG_NF_SOCKET_IPV4=y CONFIG_IP_NF_IPTABLES=y CONFIG_IP_NF_MATCH_AH=y CONFIG_IP_NF_MATCH_ECN=y @@ -169,6 +175,7 @@ CONFIG_IP_NF_ARPTABLES=y CONFIG_IP_NF_ARPFILTER=y CONFIG_IP_NF_ARP_MANGLE=y CONFIG_NF_CONNTRACK_IPV6=y +CONFIG_NF_SOCKET_IPV6=y CONFIG_IP6_NF_IPTABLES=y CONFIG_IP6_NF_MATCH_IPV6HEADER=y CONFIG_IP6_NF_MATCH_RPFILTER=y @@ -263,6 +270,7 @@ CONFIG_TABLET_USB_GTCO=y CONFIG_TABLET_USB_HANWANG=y CONFIG_TABLET_USB_KBTAB=y CONFIG_INPUT_MISC=y +CONFIG_INPUT_KEYCHORD=y CONFIG_INPUT_UINPUT=y CONFIG_INPUT_GPIO=y # CONFIG_SERIO_I8042 is not set @@ -303,7 +311,6 @@ CONFIG_SOUND=y CONFIG_SND=y CONFIG_HIDRAW=y CONFIG_UHID=y -# CONFIG_HID_GENERIC is not set CONFIG_HID_A4TECH=y CONFIG_HID_ACRUX=y CONFIG_HID_ACRUX_FF=y @@ -423,12 +430,9 @@ CONFIG_DEBUG_MEMORY_INIT=y CONFIG_DEBUG_STACKOVERFLOW=y CONFIG_HARDLOCKUP_DETECTOR=y CONFIG_PANIC_TIMEOUT=5 -# CONFIG_SCHED_DEBUG is not set CONFIG_SCHEDSTATS=y CONFIG_RCU_CPU_STALL_TIMEOUT=60 CONFIG_ENABLE_DEFAULT_TRACERS=y -# CONFIG_KPROBE_EVENTS is not set -# CONFIG_UPROBE_EVENTS is not set CONFIG_IO_DELAY_NONE=y CONFIG_DEBUG_BOOT_PARAMS=y CONFIG_OPTIMIZE_INLINING=y @@ -441,4 +445,5 @@ CONFIG_HARDENED_USERCOPY=y CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1 # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set +CONFIG_CRYPTO_SHA512=y CONFIG_CRYPTO_DEV_VIRTIO=y From 1c44ecfe6db247978e0f69c900af3d74c23d6b40 Mon Sep 17 00:00:00 2001 From: Alistair Strachan Date: Thu, 31 May 2018 15:47:36 -0700 Subject: [PATCH 04/59] ANDROID: x86_64_cuttlefish_defconfig: Enable F2FS Bug: 80475502 Change-Id: I061467404f1d4b828ac1b7423db375a35934ce28 Signed-off-by: Alistair Strachan --- arch/x86/configs/x86_64_cuttlefish_defconfig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index 1c6247dd06d9..26a42b91140b 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -398,6 +398,9 @@ CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y CONFIG_EXT4_ENCRYPTION=y +CONFIG_F2FS_FS=y +CONFIG_F2FS_FS_SECURITY=y +CONFIG_F2FS_FS_ENCRYPTION=y CONFIG_QUOTA=y CONFIG_QUOTA_NETLINK_INTERFACE=y # CONFIG_PRINT_QUOTA_WARNING is not set From bd5b366a50e8c9d0b6ea69ff0c585512fe8cb40d Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 1 Jun 2018 13:57:27 -0700 Subject: [PATCH 05/59] Revert "ANDROID: net: xfrm: check dir value of xfrm_userpolicy_id" This is fixed in a much cleaner way in 7bab09631c2a ("xfrm: policy: check policy direction value"). There is no point to having all of these extra checks when the one will do. This reverts commit 33f17703cceb1258958fb2049fc0b18b6cc8dea6. Bug: 64257838 Change-Id: I9c5862cdf1da9934144d1f785de6d2c3e69d0dd7 Signed-off-by: Nathan Chancellor --- net/xfrm/xfrm_user.c | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 3badd2c753e1..dbfcfefd6d69 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1753,10 +1753,6 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb, struct sk_buff *skb; int err; - err = verify_policy_dir(dir); - if (err) - return ERR_PTR(err); - skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!skb) return ERR_PTR(-ENOMEM); @@ -2278,10 +2274,6 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, struct net *net = sock_net(skb->sk); struct xfrm_encap_tmpl *encap = NULL; - err = verify_policy_dir(pi->dir); - if (err) - return err; - if (attrs[XFRMA_MIGRATE] == NULL) return -EINVAL; @@ -2415,11 +2407,6 @@ static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, { struct net *net = &init_net; struct sk_buff *skb; - int err; - - err = verify_policy_dir(dir); - if (err) - return err; skb = nlmsg_new(xfrm_migrate_msgsize(num_migrate, !!k, !!encap), GFP_ATOMIC); @@ -3089,11 +3076,6 @@ out_free_skb: static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c) { - int err; - - err = verify_policy_dir(dir); - if (err) - return err; switch (c->event) { case XFRM_MSG_NEWPOLICY: From 019b711f9e6a391e3449beccfae3b910ffe6ea14 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Thu, 1 Sep 2011 15:26:50 -0400 Subject: [PATCH 06/59] ANDROID: add extra free kbytes tunable Add a userspace visible knob to tell the VM to keep an extra amount of memory free, by increasing the gap between each zone's min and low watermarks. This is useful for realtime applications that call system calls and have a bound on the number of allocations that happen in any short time period. In this application, extra_free_kbytes would be left at an amount equal to or larger than than the maximum number of allocations that happen in any burst. It may also be useful to reduce the memory use of virtual machines (temporarily?), in a way that does not cause memory fragmentation like ballooning does. [ccross] Revived for use on old kernels where no other solution exists. The tunable will be removed on kernels that do better at avoiding direct reclaim. [surenb] Will be reverted as soon as Android framework is reworked to use upstream-supported watermark_scale_factor instead of extra_free_kbytes. Bug: 86445363 Change-Id: I765a42be8e964bfd3e2886d1ca85a29d60c3bb3e Signed-off-by: Rik van Riel Signed-off-by: Colin Cross Signed-off-by: Suren Baghdasaryan --- Documentation/sysctl/vm.txt | 16 ++++++++++++++++ kernel/sysctl.c | 9 +++++++++ mm/page_alloc.c | 34 ++++++++++++++++++++++++++-------- 3 files changed, 51 insertions(+), 8 deletions(-) diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 9baf66a9ef4e..1d1f2cb5abc8 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -30,6 +30,7 @@ Currently, these files are in /proc/sys/vm: - dirty_writeback_centisecs - drop_caches - extfrag_threshold +- extra_free_kbytes - hugepages_treat_as_movable - hugetlb_shm_group - laptop_mode @@ -260,6 +261,21 @@ any throttling. ============================================================== +extra_free_kbytes + +This parameter tells the VM to keep extra free memory between the threshold +where background reclaim (kswapd) kicks in, and the threshold where direct +reclaim (by allocating processes) kicks in. + +This is useful for workloads that require low latency memory allocations +and have a bounded burstiness in memory allocations, for example a +realtime application that receives and transmits network traffic +(causing in-kernel memory allocations) with a maximum total message burst +size of 200MB may need 200MB of extra free memory to avoid direct reclaim +related latencies. + +============================================================== + hugepages_treat_as_movable This parameter controls whether we can allocate hugepages from ZONE_MOVABLE diff --git a/kernel/sysctl.c b/kernel/sysctl.c index d04f697c7197..f9291b0effc2 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -105,6 +105,7 @@ extern char core_pattern[]; extern unsigned int core_pipe_limit; #endif extern int pid_max; +extern int extra_free_kbytes; extern int pid_max_min, pid_max_max; extern int percpu_pagelist_fraction; extern int latencytop_enabled; @@ -1489,6 +1490,14 @@ static struct ctl_table vm_table[] = { .extra1 = &one, .extra2 = &one_thousand, }, + { + .procname = "extra_free_kbytes", + .data = &extra_free_kbytes, + .maxlen = sizeof(extra_free_kbytes), + .mode = 0644, + .proc_handler = min_free_kbytes_sysctl_handler, + .extra1 = &zero, + }, { .procname = "percpu_pagelist_fraction", .data = &percpu_pagelist_fraction, diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 1d7693c35424..406caa653809 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -258,10 +258,22 @@ compound_page_dtor * const compound_page_dtors[] = { #endif }; +/* + * Try to keep at least this much lowmem free. Do not allow normal + * allocations below this point, only high priority ones. Automatically + * tuned according to the amount of memory in the system. + */ int min_free_kbytes = 1024; int user_min_free_kbytes = -1; int watermark_scale_factor = 10; +/* + * Extra memory for the system to try freeing. Used to temporarily + * free memory, to make space for new workloads. Anyone can allocate + * down to the min watermarks controlled by min_free_kbytes above. + */ +int extra_free_kbytes = 0; + static unsigned long __meminitdata nr_kernel_pages; static unsigned long __meminitdata nr_all_pages; static unsigned long __meminitdata dma_reserve; @@ -6892,6 +6904,7 @@ static void setup_per_zone_lowmem_reserve(void) static void __setup_per_zone_wmarks(void) { unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10); + unsigned long pages_low = extra_free_kbytes >> (PAGE_SHIFT - 10); unsigned long lowmem_pages = 0; struct zone *zone; unsigned long flags; @@ -6903,11 +6916,14 @@ static void __setup_per_zone_wmarks(void) } for_each_zone(zone) { - u64 tmp; + u64 min, low; spin_lock_irqsave(&zone->lock, flags); - tmp = (u64)pages_min * zone->managed_pages; - do_div(tmp, lowmem_pages); + min = (u64)pages_min * zone->managed_pages; + do_div(min, lowmem_pages); + low = (u64)pages_low * zone->managed_pages; + do_div(low, vm_total_pages); + if (is_highmem(zone)) { /* * __GFP_HIGH and PF_MEMALLOC allocations usually don't @@ -6928,7 +6944,7 @@ static void __setup_per_zone_wmarks(void) * If it's a lowmem zone, reserve a number of pages * proportionate to the zone's size. */ - zone->watermark[WMARK_MIN] = tmp; + zone->watermark[WMARK_MIN] = min; } /* @@ -6936,12 +6952,14 @@ static void __setup_per_zone_wmarks(void) * scale factor in proportion to available memory, but * ensure a minimum size on small systems. */ - tmp = max_t(u64, tmp >> 2, + min = max_t(u64, min >> 2, mult_frac(zone->managed_pages, watermark_scale_factor, 10000)); - zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + tmp; - zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + tmp * 2; + zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + + low + min; + zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + + low + min * 2; spin_unlock_irqrestore(&zone->lock, flags); } @@ -7024,7 +7042,7 @@ core_initcall(init_per_zone_wmark_min) /* * min_free_kbytes_sysctl_handler - just a wrapper around proc_dointvec() so * that we can call two helper functions whenever min_free_kbytes - * changes. + * or extra_free_kbytes changes. */ int min_free_kbytes_sysctl_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos) From 6a19487d5a93ee7f3850560b013a037fffd3a582 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 23 May 2018 22:53:22 -0400 Subject: [PATCH 07/59] fix io_destroy()/aio_complete() race commit 4faa99965e027cc057c5145ce45fa772caa04e8d upstream. If io_destroy() gets to cancelling everything that can be cancelled and gets to kiocb_cancel() calling the function driver has left in ->ki_cancel, it becomes vulnerable to a race with IO completion. At that point req is already taken off the list and aio_complete() does *NOT* spin until we (in free_ioctx_users()) releases ->ctx_lock. As the result, it proceeds to kiocb_free(), freing req just it gets passed to ->ki_cancel(). Fix is simple - remove from the list after the call of kiocb_cancel(). All instances of ->ki_cancel() already have to cope with the being called with iocb still on list - that's what happens in io_cancel(2). Cc: stable@kernel.org Fixes: 0460fef2a921 "aio: use cancellation list lazily" Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/aio.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index 4e23958c2509..3a749c3a92e3 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -643,9 +643,8 @@ static void free_ioctx_users(struct percpu_ref *ref) while (!list_empty(&ctx->active_reqs)) { req = list_first_entry(&ctx->active_reqs, struct aio_kiocb, ki_list); - - list_del_init(&req->ki_list); kiocb_cancel(req); + list_del_init(&req->ki_list); } spin_unlock_irq(&ctx->ctx_lock); From b968dd7650c8075de1fb74569a736f1daaa4459c Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 1 Jun 2018 16:50:50 -0700 Subject: [PATCH 08/59] mm: fix the NULL mapping case in __isolate_lru_page() commit 145e1a71e090575c74969e3daa8136d1e5b99fc8 upstream. George Boole would have noticed a slight error in 4.16 commit 69d763fc6d3a ("mm: pin address_space before dereferencing it while isolating an LRU page"). Fix it, to match both the comment above it, and the original behaviour. Although anonymous pages are not marked PageDirty at first, we have an old habit of calling SetPageDirty when a page is removed from swap cache: so there's a category of ex-swap pages that are easily migratable, but were inadvertently excluded from compaction's async migration in 4.16. Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1805302014001.12558@eggly.anvils Fixes: 69d763fc6d3a ("mm: pin address_space before dereferencing it while isolating an LRU page") Signed-off-by: Hugh Dickins Acked-by: Minchan Kim Acked-by: Mel Gorman Reported-by: Ivan Kalvachev Cc: "Huang, Ying" Cc: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/vmscan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index 1a581468a9cf..be56e2e1931e 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1451,7 +1451,7 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode) return ret; mapping = page_mapping(page); - migrate_dirty = mapping && mapping->a_ops->migratepage; + migrate_dirty = !mapping || mapping->a_ops->migratepage; unlock_page(page); if (!migrate_dirty) return ret; From 1bea53df12c47517b6e487e6fed34d0c05d42905 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 9 May 2018 22:39:15 -0500 Subject: [PATCH 09/59] objtool: Support GCC 8's cold subfunctions commit 13810435b9a7014fb92eb715f77da488f3b65b99 upstream. GCC 8 moves a lot of unlikely code out of line to "cold" subfunctions in .text.unlikely. Properly detect the new subfunctions and treat them as extensions of the original functions. This fixes a bunch of warnings like: kernel/cgroup/cgroup.o: warning: objtool: parse_cgroup_root_flags()+0x33: sibling call from callable instruction with modified stack frame kernel/cgroup/cgroup.o: warning: objtool: cgroup_addrm_files()+0x290: sibling call from callable instruction with modified stack frame kernel/cgroup/cgroup.o: warning: objtool: cgroup_apply_control_enable()+0x25b: sibling call from callable instruction with modified stack frame kernel/cgroup/cgroup.o: warning: objtool: rebind_subsystems()+0x325: sibling call from callable instruction with modified stack frame Reported-and-tested-by: damian Reported-by: Arnd Bergmann Signed-off-by: Josh Poimboeuf Acked-by: Peter Zijlstra (Intel) Cc: David Laight Cc: Greg KH Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Randy Dunlap Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/0965e7fcfc5f31a276f0c7f298ff770c19b68706.1525923412.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- tools/objtool/check.c | 93 ++++++++++++++++++++++++------------------- tools/objtool/elf.c | 42 ++++++++++++++++++- tools/objtool/elf.h | 2 + 3 files changed, 93 insertions(+), 44 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index c8b8b7101c6f..3126760180c4 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -59,6 +59,31 @@ static struct instruction *next_insn_same_sec(struct objtool_file *file, return next; } +static struct instruction *next_insn_same_func(struct objtool_file *file, + struct instruction *insn) +{ + struct instruction *next = list_next_entry(insn, list); + struct symbol *func = insn->func; + + if (!func) + return NULL; + + if (&next->list != &file->insn_list && next->func == func) + return next; + + /* Check if we're already in the subfunction: */ + if (func == func->cfunc) + return NULL; + + /* Move to the subfunction: */ + return find_insn(file, func->cfunc->sec, func->cfunc->offset); +} + +#define func_for_each_insn_all(file, func, insn) \ + for (insn = find_insn(file, func->sec, func->offset); \ + insn; \ + insn = next_insn_same_func(file, insn)) + #define func_for_each_insn(file, func, insn) \ for (insn = find_insn(file, func->sec, func->offset); \ insn && &insn->list != &file->insn_list && \ @@ -148,10 +173,14 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func, if (!strcmp(func->name, global_noreturns[i])) return 1; - if (!func->sec) + if (!func->len) return 0; - func_for_each_insn(file, func, insn) { + insn = find_insn(file, func->sec, func->offset); + if (!insn->func) + return 0; + + func_for_each_insn_all(file, func, insn) { empty = false; if (insn->type == INSN_RETURN) @@ -166,35 +195,24 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func, * case, the function's dead-end status depends on whether the target * of the sibling call returns. */ - func_for_each_insn(file, func, insn) { - if (insn->sec != func->sec || - insn->offset >= func->offset + func->len) - break; - + func_for_each_insn_all(file, func, insn) { if (insn->type == INSN_JUMP_UNCONDITIONAL) { struct instruction *dest = insn->jump_dest; - struct symbol *dest_func; if (!dest) /* sibling call to another file */ return 0; - if (dest->sec != func->sec || - dest->offset < func->offset || - dest->offset >= func->offset + func->len) { - /* local sibling call */ - dest_func = find_symbol_by_offset(dest->sec, - dest->offset); - if (!dest_func) - continue; + if (dest->func && dest->func->pfunc != insn->func->pfunc) { + /* local sibling call */ if (recursion == 5) { WARN_FUNC("infinite recursion (objtool bug!)", dest->sec, dest->offset); return -1; } - return __dead_end_function(file, dest_func, + return __dead_end_function(file, dest->func, recursion + 1); } } @@ -421,7 +439,7 @@ static void add_ignores(struct objtool_file *file) if (!ignore_func(file, func)) continue; - func_for_each_insn(file, func, insn) + func_for_each_insn_all(file, func, insn) insn->ignore = true; } } @@ -781,9 +799,8 @@ out: return ret; } -static int add_switch_table(struct objtool_file *file, struct symbol *func, - struct instruction *insn, struct rela *table, - struct rela *next_table) +static int add_switch_table(struct objtool_file *file, struct instruction *insn, + struct rela *table, struct rela *next_table) { struct rela *rela = table; struct instruction *alt_insn; @@ -793,18 +810,13 @@ static int add_switch_table(struct objtool_file *file, struct symbol *func, if (rela == next_table) break; - if (rela->sym->sec != insn->sec || - rela->addend <= func->offset || - rela->addend >= func->offset + func->len) + alt_insn = find_insn(file, rela->sym->sec, rela->addend); + if (!alt_insn) break; - alt_insn = find_insn(file, insn->sec, rela->addend); - if (!alt_insn) { - WARN("%s: can't find instruction at %s+0x%x", - file->rodata->rela->name, insn->sec->name, - rela->addend); - return -1; - } + /* Make sure the jmp dest is in the function or subfunction: */ + if (alt_insn->func->pfunc != insn->func->pfunc) + break; alt = malloc(sizeof(*alt)); if (!alt) { @@ -942,7 +954,7 @@ static int add_func_switch_tables(struct objtool_file *file, struct rela *rela, *prev_rela = NULL; int ret; - func_for_each_insn(file, func, insn) { + func_for_each_insn_all(file, func, insn) { if (!last) last = insn; @@ -973,8 +985,7 @@ static int add_func_switch_tables(struct objtool_file *file, * the beginning of another switch table in the same function. */ if (prev_jump) { - ret = add_switch_table(file, func, prev_jump, prev_rela, - rela); + ret = add_switch_table(file, prev_jump, prev_rela, rela); if (ret) return ret; } @@ -984,7 +995,7 @@ static int add_func_switch_tables(struct objtool_file *file, } if (prev_jump) { - ret = add_switch_table(file, func, prev_jump, prev_rela, NULL); + ret = add_switch_table(file, prev_jump, prev_rela, NULL); if (ret) return ret; } @@ -1748,15 +1759,13 @@ static int validate_branch(struct objtool_file *file, struct instruction *first, while (1) { next_insn = next_insn_same_sec(file, insn); - - if (file->c_file && func && insn->func && func != insn->func) { + if (file->c_file && func && insn->func && func != insn->func->pfunc) { WARN("%s() falls through to next function %s()", func->name, insn->func->name); return 1; } - if (insn->func) - func = insn->func; + func = insn->func ? insn->func->pfunc : NULL; if (func && insn->ignore) { WARN_FUNC("BUG: why am I validating an ignored function?", @@ -1777,7 +1786,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first, i = insn; save_insn = NULL; - func_for_each_insn_continue_reverse(file, func, i) { + func_for_each_insn_continue_reverse(file, insn->func, i) { if (i->save) { save_insn = i; break; @@ -1864,7 +1873,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first, case INSN_JUMP_UNCONDITIONAL: if (insn->jump_dest && (!func || !insn->jump_dest->func || - func == insn->jump_dest->func)) { + insn->jump_dest->func->pfunc == func)) { ret = validate_branch(file, insn->jump_dest, state); if (ret) @@ -2059,7 +2068,7 @@ static int validate_functions(struct objtool_file *file) for_each_sec(file, sec) { list_for_each_entry(func, &sec->symbol_list, list) { - if (func->type != STT_FUNC) + if (func->type != STT_FUNC || func->pfunc != func) continue; insn = find_insn(file, sec, func->offset); diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index c1c338661699..4e60e105583e 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -79,6 +79,19 @@ struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset) return NULL; } +struct symbol *find_symbol_by_name(struct elf *elf, const char *name) +{ + struct section *sec; + struct symbol *sym; + + list_for_each_entry(sec, &elf->sections, list) + list_for_each_entry(sym, &sec->symbol_list, list) + if (!strcmp(sym->name, name)) + return sym; + + return NULL; +} + struct symbol *find_symbol_containing(struct section *sec, unsigned long offset) { struct symbol *sym; @@ -203,10 +216,11 @@ static int read_sections(struct elf *elf) static int read_symbols(struct elf *elf) { - struct section *symtab; - struct symbol *sym; + struct section *symtab, *sec; + struct symbol *sym, *pfunc; struct list_head *entry, *tmp; int symbols_nr, i; + char *coldstr; symtab = find_section_by_name(elf, ".symtab"); if (!symtab) { @@ -281,6 +295,30 @@ static int read_symbols(struct elf *elf) hash_add(sym->sec->symbol_hash, &sym->hash, sym->idx); } + /* Create parent/child links for any cold subfunctions */ + list_for_each_entry(sec, &elf->sections, list) { + list_for_each_entry(sym, &sec->symbol_list, list) { + if (sym->type != STT_FUNC) + continue; + sym->pfunc = sym->cfunc = sym; + coldstr = strstr(sym->name, ".cold."); + if (coldstr) { + coldstr[0] = '\0'; + pfunc = find_symbol_by_name(elf, sym->name); + coldstr[0] = '.'; + + if (!pfunc) { + WARN("%s(): can't find parent function", + sym->name); + goto err; + } + + sym->pfunc = pfunc; + pfunc->cfunc = sym; + } + } + } + return 0; err: diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h index d86e2ff14466..de5cd2ddded9 100644 --- a/tools/objtool/elf.h +++ b/tools/objtool/elf.h @@ -61,6 +61,7 @@ struct symbol { unsigned char bind, type; unsigned long offset; unsigned int len; + struct symbol *pfunc, *cfunc; }; struct rela { @@ -86,6 +87,7 @@ struct elf { struct elf *elf_open(const char *name, int flags); struct section *find_section_by_name(struct elf *elf, const char *name); struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset); +struct symbol *find_symbol_by_name(struct elf *elf, const char *name); struct symbol *find_symbol_containing(struct section *sec, unsigned long offset); struct rela *find_rela_by_dest(struct section *sec, unsigned long offset); struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset, From 2c26d5784e71c3a5f456436d893e39c67cbe088d Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 10 May 2018 17:48:49 -0500 Subject: [PATCH 10/59] objtool: Support GCC 8 switch tables commit fd35c88b74170d9335530d9abf271d5d73eb5401 upstream. With GCC 8, some issues were found with the objtool switch table detection. 1) In the .rodata section, immediately after the switch table, there can be another object which contains a pointer to the function which had the switch statement. In this case objtool wrongly considers the function pointer to be part of the switch table. Fix it by: a) making sure there are no pointers to the beginning of the function; and b) making sure there are no gaps in the switch table. Only the former was needed, the latter adds additional protection for future optimizations. 2) In find_switch_table(), case 1 and case 2 are missing the check to ensure that the .rodata switch table data is anonymous, i.e. that it isn't already associated with an ELF symbol. Fix it by adding the same find_symbol_containing() check which is used for case 3. This fixes the following warnings with GCC 8: drivers/block/virtio_blk.o: warning: objtool: virtio_queue_rq()+0x0: stack state mismatch: cfa1=7+8 cfa2=7+72 net/ipv6/icmp.o: warning: objtool: icmpv6_rcv()+0x0: stack state mismatch: cfa1=7+8 cfa2=7+64 drivers/usb/core/quirks.o: warning: objtool: quirks_param_set()+0x0: stack state mismatch: cfa1=7+8 cfa2=7+48 drivers/mtd/nand/raw/nand_hynix.o: warning: objtool: hynix_nand_decode_id()+0x0: stack state mismatch: cfa1=7+8 cfa2=7+24 drivers/mtd/nand/raw/nand_samsung.o: warning: objtool: samsung_nand_decode_id()+0x0: stack state mismatch: cfa1=7+8 cfa2=7+32 drivers/gpu/drm/nouveau/nvkm/subdev/top/gk104.o: warning: objtool: gk104_top_oneinit()+0x0: stack state mismatch: cfa1=7+8 cfa2=7+64 Reported-by: Arnd Bergmann Reported-by: kbuild test robot Signed-off-by: Josh Poimboeuf Acked-by: Peter Zijlstra (Intel) Cc: David Laight Cc: Greg KH Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Randy Dunlap Cc: Thomas Gleixner Cc: damian Link: http://lkml.kernel.org/r/20180510224849.xwi34d6tzheb5wgw@treble Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- tools/objtool/check.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 3126760180c4..063afbdec42d 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -805,17 +805,28 @@ static int add_switch_table(struct objtool_file *file, struct instruction *insn, struct rela *rela = table; struct instruction *alt_insn; struct alternative *alt; + struct symbol *pfunc = insn->func->pfunc; + unsigned int prev_offset = 0; list_for_each_entry_from(rela, &file->rodata->rela->rela_list, list) { if (rela == next_table) break; + /* Make sure the switch table entries are consecutive: */ + if (prev_offset && rela->offset != prev_offset + 8) + break; + + /* Detect function pointers from contiguous objects: */ + if (rela->sym->sec == pfunc->sec && + rela->addend == pfunc->offset) + break; + alt_insn = find_insn(file, rela->sym->sec, rela->addend); if (!alt_insn) break; /* Make sure the jmp dest is in the function or subfunction: */ - if (alt_insn->func->pfunc != insn->func->pfunc) + if (alt_insn->func->pfunc != pfunc) break; alt = malloc(sizeof(*alt)); @@ -826,6 +837,13 @@ static int add_switch_table(struct objtool_file *file, struct instruction *insn, alt->insn = alt_insn; list_add_tail(&alt->list, &insn->alts); + prev_offset = rela->offset; + } + + if (!prev_offset) { + WARN_FUNC("can't find switch jump table", + insn->sec, insn->offset); + return -1; } return 0; @@ -882,7 +900,9 @@ static struct rela *find_switch_table(struct objtool_file *file, struct instruction *orig_insn = insn; text_rela = find_rela_by_dest_range(insn->sec, insn->offset, insn->len); - if (text_rela && text_rela->sym == file->rodata->sym) { + if (text_rela && text_rela->sym == file->rodata->sym && + !find_symbol_containing(file->rodata, text_rela->addend)) { + /* case 1 */ rodata_rela = find_rela_by_dest(file->rodata, text_rela->addend); From afb5e5c8a125fa0a797311c2a18ccb53b59d1449 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 14 May 2018 08:53:24 -0500 Subject: [PATCH 11/59] objtool: Detect RIP-relative switch table references commit 6f5ec2993b1f39aed12fa6fd56e8dc2272ee8a33 upstream. Typically a switch table can be found by detecting a .rodata access followed an indirect jump: 1969: 4a 8b 0c e5 00 00 00 mov 0x0(,%r12,8),%rcx 1970: 00 196d: R_X86_64_32S .rodata+0x438 1971: e9 00 00 00 00 jmpq 1976 1972: R_X86_64_PC32 __x86_indirect_thunk_rcx-0x4 Randy Dunlap reported a case (seen with GCC 4.8) where the .rodata access uses RIP-relative addressing: 19bd: 48 8b 3d 00 00 00 00 mov 0x0(%rip),%rdi # 19c4 19c0: R_X86_64_PC32 .rodata+0x45c 19c4: e9 00 00 00 00 jmpq 19c9 19c5: R_X86_64_PC32 __x86_indirect_thunk_rdi-0x4 In this case the relocation addend needs to be adjusted accordingly in order to find the location of the switch table. The fix is for case 3 (as described in the comments), but also make the existing case 1 & 2 checks more precise by only adjusting the addend for R_X86_64_PC32 relocations. This fixes the following warnings: drivers/video/fbdev/omap2/omapfb/dss/dispc.o: warning: objtool: dispc_runtime_suspend()+0xbb8: sibling call from callable instruction with modified stack frame drivers/video/fbdev/omap2/omapfb/dss/dispc.o: warning: objtool: dispc_runtime_resume()+0xcc5: sibling call from callable instruction with modified stack frame Reported-by: Randy Dunlap Signed-off-by: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/b6098294fd67afb69af8c47c9883d7a68bf0f8ea.1526305958.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- tools/objtool/check.c | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 063afbdec42d..27c79e4d274c 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -898,24 +898,24 @@ static struct rela *find_switch_table(struct objtool_file *file, { struct rela *text_rela, *rodata_rela; struct instruction *orig_insn = insn; + unsigned long table_offset; + /* case 1 & 2 */ text_rela = find_rela_by_dest_range(insn->sec, insn->offset, insn->len); if (text_rela && text_rela->sym == file->rodata->sym && !find_symbol_containing(file->rodata, text_rela->addend)) { - /* case 1 */ - rodata_rela = find_rela_by_dest(file->rodata, - text_rela->addend); - if (rodata_rela) - return rodata_rela; + table_offset = text_rela->addend; + if (text_rela->type == R_X86_64_PC32) { + /* case 2 */ + table_offset += 4; + file->ignore_unreachables = true; + } - /* case 2 */ - rodata_rela = find_rela_by_dest(file->rodata, - text_rela->addend + 4); + rodata_rela = find_rela_by_dest(file->rodata, table_offset); if (!rodata_rela) return NULL; - file->ignore_unreachables = true; return rodata_rela; } @@ -949,18 +949,21 @@ static struct rela *find_switch_table(struct objtool_file *file, if (!text_rela || text_rela->sym != file->rodata->sym) continue; + table_offset = text_rela->addend; + if (text_rela->type == R_X86_64_PC32) + table_offset += 4; + /* * Make sure the .rodata address isn't associated with a * symbol. gcc jump tables are anonymous data. */ - if (find_symbol_containing(file->rodata, text_rela->addend)) + if (find_symbol_containing(file->rodata, table_offset)) continue; - rodata_rela = find_rela_by_dest(file->rodata, text_rela->addend); - if (!rodata_rela) - continue; - - return rodata_rela; + /* mov [rodata addr], %reg */ + rodata_rela = find_rela_by_dest(file->rodata, table_offset); + if (rodata_rela) + return rodata_rela; } return NULL; From 806a730c0b0beac0a7810735b910301c6da342f3 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 18 May 2018 15:10:34 -0500 Subject: [PATCH 12/59] objtool: Detect RIP-relative switch table references, part 2 commit 7dec80ccbe310fb7e225bf21c48c672bb780ce7b upstream. With the following commit: fd35c88b7417 ("objtool: Support GCC 8 switch tables") I added a "can't find switch jump table" warning, to stop covering up silent failures if add_switch_table() can't find anything. That warning found yet another bug in the objtool switch table detection logic. For cases 1 and 2 (as described in the comments of find_switch_table()), the find_symbol_containing() check doesn't adjust the offset for RIP-relative switch jumps. Incidentally, this bug was already fixed for case 3 with: 6f5ec2993b1f ("objtool: Detect RIP-relative switch table references") However, that commit missed the fix for cases 1 and 2. The different cases are now starting to look more and more alike. So fix the bug by consolidating them into a single case, by checking the original dynamic jump instruction in the case 3 loop. This also simplifies the code and makes it more robust against future switch table detection issues -- of which I'm sure there will be many... Switch table detection has been the most fragile area of objtool, by far. I long for the day when we'll have a GCC plugin for annotating switch tables. Linus asked me to delay such a plugin due to the flakiness of the plugin infrastructure in older versions of GCC, so this rickety code is what we're stuck with for now. At least the code is now a little simpler than it was. Reported-by: kbuild test robot Signed-off-by: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/f400541613d45689086329432f3095119ffbc328.1526674218.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- tools/objtool/check.c | 37 ++++++++++++------------------------- 1 file changed, 12 insertions(+), 25 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 27c79e4d274c..2299eae155ac 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -900,40 +900,19 @@ static struct rela *find_switch_table(struct objtool_file *file, struct instruction *orig_insn = insn; unsigned long table_offset; - /* case 1 & 2 */ - text_rela = find_rela_by_dest_range(insn->sec, insn->offset, insn->len); - if (text_rela && text_rela->sym == file->rodata->sym && - !find_symbol_containing(file->rodata, text_rela->addend)) { - - table_offset = text_rela->addend; - if (text_rela->type == R_X86_64_PC32) { - /* case 2 */ - table_offset += 4; - file->ignore_unreachables = true; - } - - rodata_rela = find_rela_by_dest(file->rodata, table_offset); - if (!rodata_rela) - return NULL; - - return rodata_rela; - } - - /* case 3 */ /* * Backward search using the @first_jump_src links, these help avoid * much of the 'in between' code. Which avoids us getting confused by * it. */ - for (insn = list_prev_entry(insn, list); - + for (; &insn->list != &file->insn_list && insn->sec == func->sec && insn->offset >= func->offset; insn = insn->first_jump_src ?: list_prev_entry(insn, list)) { - if (insn->type == INSN_JUMP_DYNAMIC) + if (insn != orig_insn && insn->type == INSN_JUMP_DYNAMIC) break; /* allow small jumps within the range */ @@ -960,10 +939,18 @@ static struct rela *find_switch_table(struct objtool_file *file, if (find_symbol_containing(file->rodata, table_offset)) continue; - /* mov [rodata addr], %reg */ rodata_rela = find_rela_by_dest(file->rodata, table_offset); - if (rodata_rela) + if (rodata_rela) { + /* + * Use of RIP-relative switch jumps is quite rare, and + * indicates a rare GCC quirk/bug which can leave dead + * code behind. + */ + if (text_rela->type == R_X86_64_PC32) + file->ignore_unreachables = true; + return rodata_rela; + } } return NULL; From 6bcf3b066c69d00068d1afdcad477d4648ffa51d Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 9 May 2018 22:39:14 -0500 Subject: [PATCH 13/59] objtool: Fix "noreturn" detection for recursive sibling calls commit 0afd0d9e0e7879d666c1df2fa1bea4d8716909fe upstream. Objtool has some crude logic for detecting static "noreturn" functions (aka "dead ends"). This is necessary for being able to correctly follow GCC code flow when such functions are called. It's remotely possible for two functions to call each other via sibling calls. If they don't have RET instructions, objtool's noreturn detection logic goes into a recursive loop: drivers/char/ipmi/ipmi_ssif.o: warning: objtool: return_hosed_msg()+0x0: infinite recursion (objtool bug!) drivers/char/ipmi/ipmi_ssif.o: warning: objtool: deliver_recv_msg()+0x0: infinite recursion (objtool bug!) Instead of reporting an error in this case, consider the functions to be non-dead-ends. Reported-and-tested-by: Randy Dunlap Signed-off-by: Josh Poimboeuf Acked-by: Peter Zijlstra (Intel) Cc: Arnd Bergmann Cc: David Laight Cc: Greg KH Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: damian Link: http://lkml.kernel.org/r/7cc156408c5781a1f62085d352ced1fe39fe2f91.1525923412.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- tools/objtool/check.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 2299eae155ac..e128d1c71c30 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -207,9 +207,13 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func, /* local sibling call */ if (recursion == 5) { - WARN_FUNC("infinite recursion (objtool bug!)", - dest->sec, dest->offset); - return -1; + /* + * Infinite recursion: two functions + * have sibling calls to each other. + * This is a very rare case. It means + * they aren't dead ends. + */ + return 0; } return __dead_end_function(file, dest->func, From 5df3a1b9f87b62c66fb2fa39c7b22eb32303df36 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Wed, 21 Feb 2018 11:19:00 +0100 Subject: [PATCH 14/59] x86/mce/AMD: Carve out SMCA get_block_address() code commit 8a331f4a0863bea758561c921b94b4d28f7c4029 upstream. Carve out the SMCA code in get_block_address() into a separate helper function. No functional change. Signed-off-by: Yazen Ghannam [ Save an indentation level. ] Signed-off-by: Borislav Petkov Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Cc: linux-edac Link: http://lkml.kernel.org/r/20180215210943.11530-4-Yazen.Ghannam@amd.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 57 +++++++++++++++------------- 1 file changed, 31 insertions(+), 26 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 259c75d7a2a0..b44e3e263abd 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -429,6 +429,35 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c) wrmsr(MSR_CU_DEF_ERR, low, high); } +static u32 smca_get_block_address(unsigned int cpu, unsigned int bank, + unsigned int block) +{ + u32 low, high; + u32 addr = 0; + + if (smca_get_bank_type(bank) == SMCA_RESERVED) + return addr; + + if (!block) + return MSR_AMD64_SMCA_MCx_MISC(bank); + + /* + * For SMCA enabled processors, BLKPTR field of the first MISC register + * (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4). + */ + if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high)) + return addr; + + if (!(low & MCI_CONFIG_MCAX)) + return addr; + + if (!rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) && + (low & MASK_BLKPTR_LO)) + return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1); + + return addr; +} + static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 high, unsigned int bank, unsigned int block) { @@ -449,32 +478,8 @@ static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 hi } } - if (mce_flags.smca) { - if (smca_get_bank_type(bank) == SMCA_RESERVED) - return addr; - - if (!block) { - addr = MSR_AMD64_SMCA_MCx_MISC(bank); - } else { - /* - * For SMCA enabled processors, BLKPTR field of the - * first MISC register (MCx_MISC0) indicates presence of - * additional MISC register set (MISC1-4). - */ - u32 low, high; - - if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high)) - return addr; - - if (!(low & MCI_CONFIG_MCAX)) - return addr; - - if (!rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) && - (low & MASK_BLKPTR_LO)) - addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1); - } - return addr; - } + if (mce_flags.smca) + return smca_get_block_address(cpu, bank, block); /* Fall back to method we used for older processors: */ switch (block) { From 4cbe6caa4c6c3e9099d73160c59fe3c11ca5b3c5 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 17 May 2018 10:46:26 +0200 Subject: [PATCH 15/59] x86/MCE/AMD: Cache SMCA MISC block addresses commit 78ce241099bb363b19dbd0245442e66c8de8f567 upstream. ... into a global, two-dimensional array and service subsequent reads from that cache to avoid rdmsr_on_cpu() calls during CPU hotplug (IPIs with IRQs disabled). In addition, this fixes a KASAN slab-out-of-bounds read due to wrong usage of the bank->blocks pointer. Fixes: 27bd59502702 ("x86/mce/AMD: Get address from already initialized block") Reported-by: Johannes Hirte Tested-by: Johannes Hirte Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Yazen Ghannam Link: http://lkml.kernel.org/r/20180414004230.GA2033@probook Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 29 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index b44e3e263abd..dbcb01006749 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -94,6 +94,11 @@ static struct smca_bank_name smca_names[] = { [SMCA_SMU] = { "smu", "System Management Unit" }, }; +static u32 smca_bank_addrs[MAX_NR_BANKS][NR_BLOCKS] __ro_after_init = +{ + [0 ... MAX_NR_BANKS - 1] = { [0 ... NR_BLOCKS - 1] = -1 } +}; + const char *smca_get_name(enum smca_bank_types t) { if (t >= N_SMCA_BANK_TYPES) @@ -441,20 +446,26 @@ static u32 smca_get_block_address(unsigned int cpu, unsigned int bank, if (!block) return MSR_AMD64_SMCA_MCx_MISC(bank); + /* Check our cache first: */ + if (smca_bank_addrs[bank][block] != -1) + return smca_bank_addrs[bank][block]; + /* * For SMCA enabled processors, BLKPTR field of the first MISC register * (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4). */ if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high)) - return addr; + goto out; if (!(low & MCI_CONFIG_MCAX)) - return addr; + goto out; if (!rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) && (low & MASK_BLKPTR_LO)) - return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1); + addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1); +out: + smca_bank_addrs[bank][block] = addr; return addr; } @@ -466,18 +477,6 @@ static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 hi if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS)) return addr; - /* Get address from already initialized block. */ - if (per_cpu(threshold_banks, cpu)) { - struct threshold_bank *bankp = per_cpu(threshold_banks, cpu)[bank]; - - if (bankp && bankp->blocks) { - struct threshold_block *blockp = &bankp->blocks[block]; - - if (blockp) - return blockp->address; - } - } - if (mce_flags.smca) return smca_get_block_address(cpu, bank, block); From 085fc1967b56c2bed7f9ab05bbaeb7e9ca86d0b5 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 31 May 2018 17:58:13 +0200 Subject: [PATCH 16/59] Revert "pinctrl: msm: Use dynamic GPIO numbering" This reverts commit bd36ea57d6d58041180c4f7d2c2bf5194c26845d which is commit a7aa75a2a7dba32594291a71c3704000a2fd7089 upstream. There's been too many complaints about this. Personally I think it's going to blow up when people hit this in mainline, but hey, it's not my systems. At least we don't have to backport the mess to the stable kernels to give them some more life to live unscathed :) Reported-by: Timur Tabi Reported-by: Sebastian Gottschall Cc: Bjorn Andersson Cc: Linus Walleij Cc: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/qcom/pinctrl-msm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c index 19cd357bb464..ff491da64dab 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm.c +++ b/drivers/pinctrl/qcom/pinctrl-msm.c @@ -818,7 +818,7 @@ static int msm_gpio_init(struct msm_pinctrl *pctrl) return -EINVAL; chip = &pctrl->chip; - chip->base = -1; + chip->base = 0; chip->ngpio = ngpio; chip->label = dev_name(pctrl->dev); chip->parent = pctrl->dev; From 54978daa9dc55124986a07c2eccd76166a3fe8ef Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Thu, 15 Mar 2018 14:21:08 +0000 Subject: [PATCH 17/59] PCI: hv: Fix 2 hang issues in hv_compose_msi_msg() commit de0aa7b2f97d348ba7d1e17a00744c989baa0cb6 upstream. 1. With the patch "x86/vector/msi: Switch to global reservation mode", the recent v4.15 and newer kernels always hang for 1-vCPU Hyper-V VM with SR-IOV. This is because when we reach hv_compose_msi_msg() by request_irq() -> request_threaded_irq() ->__setup_irq()->irq_startup() -> __irq_startup() -> irq_domain_activate_irq() -> ... -> msi_domain_activate() -> ... -> hv_compose_msi_msg(), local irq is disabled in __setup_irq(). Note: when we reach hv_compose_msi_msg() by another code path: pci_enable_msix_range() -> ... -> irq_domain_activate_irq() -> ... -> hv_compose_msi_msg(), local irq is not disabled. hv_compose_msi_msg() depends on an interrupt from the host. With interrupts disabled, a UP VM always hangs in the busy loop in the function, because the interrupt callback hv_pci_onchannelcallback() can not be called. We can do nothing but work it around by polling the channel. This is ugly, but we don't have any other choice. 2. If the host is ejecting the VF device before we reach hv_compose_msi_msg(), in a UP VM, we can hang in hv_compose_msi_msg() forever, because at this time the host doesn't respond to the CREATE_INTERRUPT request. This issue exists the first day the pci-hyperv driver appears in the kernel. Luckily, this can also by worked around by polling the channel for the PCI_EJECT message and hpdev->state, and by checking the PCI vendor ID. Note: actually the above 2 issues also happen to a SMP VM, if "hbus->hdev->channel->target_cpu == smp_processor_id()" is true. Fixes: 4900be83602b ("x86/vector/msi: Switch to global reservation mode") Tested-by: Adrian Suhov Tested-by: Chris Valean Signed-off-by: Dexuan Cui Signed-off-by: Lorenzo Pieralisi Reviewed-by: Michael Kelley Acked-by: Haiyang Zhang Cc: Cc: Stephen Hemminger Cc: K. Y. Srinivasan Cc: Vitaly Kuznetsov Cc: Jack Morgenstein Signed-off-by: Greg Kroah-Hartman --- drivers/pci/host/pci-hyperv.c | 58 ++++++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c index 73b724143be0..c91662927de0 100644 --- a/drivers/pci/host/pci-hyperv.c +++ b/drivers/pci/host/pci-hyperv.c @@ -531,6 +531,8 @@ struct hv_pci_compl { s32 completion_status; }; +static void hv_pci_onchannelcallback(void *context); + /** * hv_pci_generic_compl() - Invoked for a completion packet * @context: Set up by the sender of the packet. @@ -675,6 +677,31 @@ static void _hv_pcifront_read_config(struct hv_pci_dev *hpdev, int where, } } +static u16 hv_pcifront_get_vendor_id(struct hv_pci_dev *hpdev) +{ + u16 ret; + unsigned long flags; + void __iomem *addr = hpdev->hbus->cfg_addr + CFG_PAGE_OFFSET + + PCI_VENDOR_ID; + + spin_lock_irqsave(&hpdev->hbus->config_lock, flags); + + /* Choose the function to be read. (See comment above) */ + writel(hpdev->desc.win_slot.slot, hpdev->hbus->cfg_addr); + /* Make sure the function was chosen before we start reading. */ + mb(); + /* Read from that function's config space. */ + ret = readw(addr); + /* + * mb() is not required here, because the spin_unlock_irqrestore() + * is a barrier. + */ + + spin_unlock_irqrestore(&hpdev->hbus->config_lock, flags); + + return ret; +} + /** * _hv_pcifront_write_config() - Internal PCI config write * @hpdev: The PCI driver's representation of the device @@ -1121,8 +1148,37 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) * Since this function is called with IRQ locks held, can't * do normal wait for completion; instead poll. */ - while (!try_wait_for_completion(&comp.comp_pkt.host_event)) + while (!try_wait_for_completion(&comp.comp_pkt.host_event)) { + /* 0xFFFF means an invalid PCI VENDOR ID. */ + if (hv_pcifront_get_vendor_id(hpdev) == 0xFFFF) { + dev_err_once(&hbus->hdev->device, + "the device has gone\n"); + goto free_int_desc; + } + + /* + * When the higher level interrupt code calls us with + * interrupt disabled, we must poll the channel by calling + * the channel callback directly when channel->target_cpu is + * the current CPU. When the higher level interrupt code + * calls us with interrupt enabled, let's add the + * local_bh_disable()/enable() to avoid race. + */ + local_bh_disable(); + + if (hbus->hdev->channel->target_cpu == smp_processor_id()) + hv_pci_onchannelcallback(hbus); + + local_bh_enable(); + + if (hpdev->state == hv_pcichild_ejecting) { + dev_err_once(&hbus->hdev->device, + "the device is being ejected\n"); + goto free_int_desc; + } + udelay(100); + } if (comp.comp_pkt.completion_status < 0) { dev_err(&hbus->hdev->device, From d9a59eac3fd67c559df87c6cf6ab07298012d197 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 6 Mar 2018 17:08:32 -0800 Subject: [PATCH 18/59] xfs: convert XFS_AGFL_SIZE to a helper function commit a78ee256c325ecfaec13cafc41b315bd4e1dd518 upstream. The AGFL size calculation is about to get more complex, so lets turn the macro into a function first and remove the macro. Signed-off-by: Dave Chinner [darrick: forward port to newer kernel, simplify the helper] Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_alloc.c | 31 ++++++++++++++++++++++++------- fs/xfs/libxfs/xfs_alloc.h | 2 ++ fs/xfs/libxfs/xfs_format.h | 13 +------------ fs/xfs/xfs_fsops.c | 2 +- 4 files changed, 28 insertions(+), 20 deletions(-) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index f965ce832bc0..8164eeb8db28 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -52,6 +52,23 @@ STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); +/* + * Size of the AGFL. For CRC-enabled filesystes we steal a couple of slots in + * the beginning of the block for a proper header with the location information + * and CRC. + */ +unsigned int +xfs_agfl_size( + struct xfs_mount *mp) +{ + unsigned int size = mp->m_sb.sb_sectsize; + + if (xfs_sb_version_hascrc(&mp->m_sb)) + size -= sizeof(struct xfs_agfl); + + return size / sizeof(xfs_agblock_t); +} + unsigned int xfs_refc_block( struct xfs_mount *mp) @@ -540,7 +557,7 @@ xfs_agfl_verify( if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno) return false; - for (i = 0; i < XFS_AGFL_SIZE(mp); i++) { + for (i = 0; i < xfs_agfl_size(mp); i++) { if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK && be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks) return false; @@ -2252,7 +2269,7 @@ xfs_alloc_get_freelist( bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]); be32_add_cpu(&agf->agf_flfirst, 1); xfs_trans_brelse(tp, agflbp); - if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp)) + if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp)) agf->agf_flfirst = 0; pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno)); @@ -2363,7 +2380,7 @@ xfs_alloc_put_freelist( be32_to_cpu(agf->agf_seqno), &agflbp))) return error; be32_add_cpu(&agf->agf_fllast, 1); - if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp)) + if (be32_to_cpu(agf->agf_fllast) == xfs_agfl_size(mp)) agf->agf_fllast = 0; pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno)); @@ -2381,7 +2398,7 @@ xfs_alloc_put_freelist( xfs_alloc_log_agf(tp, agbp, logflags); - ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)); + ASSERT(be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp)); agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp); blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)]; @@ -2414,9 +2431,9 @@ xfs_agf_verify( if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) && XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) && be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) && - be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) && - be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) && - be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp))) + be32_to_cpu(agf->agf_flfirst) < xfs_agfl_size(mp) && + be32_to_cpu(agf->agf_fllast) < xfs_agfl_size(mp) && + be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp))) return false; if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 || diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index ef26edc2e938..346ba8ab68b5 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -26,6 +26,8 @@ struct xfs_trans; extern struct workqueue_struct *xfs_alloc_wq; +unsigned int xfs_agfl_size(struct xfs_mount *mp); + /* * Freespace allocation types. Argument to xfs_alloc_[v]extent. */ diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 23229f0c5b15..ed4481b2f113 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -798,24 +798,13 @@ typedef struct xfs_agi { &(XFS_BUF_TO_AGFL(bp)->agfl_bno[0]) : \ (__be32 *)(bp)->b_addr) -/* - * Size of the AGFL. For CRC-enabled filesystes we steal a couple of - * slots in the beginning of the block for a proper header with the - * location information and CRC. - */ -#define XFS_AGFL_SIZE(mp) \ - (((mp)->m_sb.sb_sectsize - \ - (xfs_sb_version_hascrc(&((mp)->m_sb)) ? \ - sizeof(struct xfs_agfl) : 0)) / \ - sizeof(xfs_agblock_t)) - typedef struct xfs_agfl { __be32 agfl_magicnum; __be32 agfl_seqno; uuid_t agfl_uuid; __be64 agfl_lsn; __be32 agfl_crc; - __be32 agfl_bno[]; /* actually XFS_AGFL_SIZE(mp) */ + __be32 agfl_bno[]; /* actually xfs_agfl_size(mp) */ } __attribute__((packed)) xfs_agfl_t; #define XFS_AGFL_CRC_OFF offsetof(struct xfs_agfl, agfl_crc) diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 8f22fc579dbb..40783a313df9 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -294,7 +294,7 @@ xfs_growfs_data_private( } agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, bp); - for (bucket = 0; bucket < XFS_AGFL_SIZE(mp); bucket++) + for (bucket = 0; bucket < xfs_agfl_size(mp); bucket++) agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK); error = xfs_bwrite(bp); From d1db300b8ffc4f644067e3444a5a9293d05f9441 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Thu, 15 Mar 2018 10:51:58 -0700 Subject: [PATCH 19/59] xfs: detect agfl count corruption and reset agfl commit a27ba2607e60312554cbcd43fc660b2c7f29dc9c upstream. The struct xfs_agfl v5 header was originally introduced with unexpected padding that caused the AGFL to operate with one less slot than intended. The header has since been packed, but the fix left an incompatibility for users who upgrade from an old kernel with the unpacked header to a newer kernel with the packed header while the AGFL happens to wrap around the end. The newer kernel recognizes one extra slot at the physical end of the AGFL that the previous kernel did not. The new kernel will eventually attempt to allocate a block from that slot, which contains invalid data, and cause a crash. This condition can be detected by comparing the active range of the AGFL to the count. While this detects a padding mismatch, it can also trigger false positives for unrelated flcount corruption. Since we cannot distinguish a size mismatch due to padding from unrelated corruption, we can't trust the AGFL enough to simply repopulate the empty slot. Instead, avoid unnecessarily complex detection logic and and use a solution that can handle any form of flcount corruption that slips through read verifiers: distrust the entire AGFL and reset it to an empty state. Any valid blocks within the AGFL are intentionally leaked. This requires xfs_repair to rectify (which was already necessary based on the state the AGFL was found in). The reset mitigates the side effect of the padding mismatch problem from a filesystem crash to a free space accounting inconsistency. The generic approach also means that this patch can be safely backported to kernels with or without a packed struct xfs_agfl. Check the AGF for an invalid freelist count on initial read from disk. If detected, set a flag on the xfs_perag to indicate that a reset is required before the AGFL can be used. In the first transaction that attempts to use a flagged AGFL, reset it to empty, warn the user about the inconsistency and allow the freelist fixup code to repopulate the AGFL with new blocks. The xfs_perag flag is cleared to eliminate the need for repeated checks on each block allocation operation. This allows kernels that include the packing fix commit 96f859d52bcb ("libxfs: pack the agfl header structure so XFS_AGFL_SIZE is correct") to handle older unpacked AGFL formats without a filesystem crash. Suggested-by: Dave Chinner Signed-off-by: Brian Foster Reviewed-by: Darrick J. Wong Reviewed-by Dave Chiluk Signed-off-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_alloc.c | 94 +++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_mount.h | 1 + fs/xfs/xfs_trace.h | 9 +++- 3 files changed, 103 insertions(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 8164eeb8db28..516e0c57cf9c 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -2056,6 +2056,93 @@ xfs_alloc_space_available( return true; } +/* + * Check the agfl fields of the agf for inconsistency or corruption. The purpose + * is to detect an agfl header padding mismatch between current and early v5 + * kernels. This problem manifests as a 1-slot size difference between the + * on-disk flcount and the active [first, last] range of a wrapped agfl. This + * may also catch variants of agfl count corruption unrelated to padding. Either + * way, we'll reset the agfl and warn the user. + * + * Return true if a reset is required before the agfl can be used, false + * otherwise. + */ +static bool +xfs_agfl_needs_reset( + struct xfs_mount *mp, + struct xfs_agf *agf) +{ + uint32_t f = be32_to_cpu(agf->agf_flfirst); + uint32_t l = be32_to_cpu(agf->agf_fllast); + uint32_t c = be32_to_cpu(agf->agf_flcount); + int agfl_size = xfs_agfl_size(mp); + int active; + + /* no agfl header on v4 supers */ + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return false; + + /* + * The agf read verifier catches severe corruption of these fields. + * Repeat some sanity checks to cover a packed -> unpacked mismatch if + * the verifier allows it. + */ + if (f >= agfl_size || l >= agfl_size) + return true; + if (c > agfl_size) + return true; + + /* + * Check consistency between the on-disk count and the active range. An + * agfl padding mismatch manifests as an inconsistent flcount. + */ + if (c && l >= f) + active = l - f + 1; + else if (c) + active = agfl_size - f + l + 1; + else + active = 0; + + return active != c; +} + +/* + * Reset the agfl to an empty state. Ignore/drop any existing blocks since the + * agfl content cannot be trusted. Warn the user that a repair is required to + * recover leaked blocks. + * + * The purpose of this mechanism is to handle filesystems affected by the agfl + * header padding mismatch problem. A reset keeps the filesystem online with a + * relatively minor free space accounting inconsistency rather than suffer the + * inevitable crash from use of an invalid agfl block. + */ +static void +xfs_agfl_reset( + struct xfs_trans *tp, + struct xfs_buf *agbp, + struct xfs_perag *pag) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); + + ASSERT(pag->pagf_agflreset); + trace_xfs_agfl_reset(mp, agf, 0, _RET_IP_); + + xfs_warn(mp, + "WARNING: Reset corrupted AGFL on AG %u. %d blocks leaked. " + "Please unmount and run xfs_repair.", + pag->pag_agno, pag->pagf_flcount); + + agf->agf_flfirst = 0; + agf->agf_fllast = cpu_to_be32(xfs_agfl_size(mp) - 1); + agf->agf_flcount = 0; + xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLLAST | + XFS_AGF_FLCOUNT); + + pag->pagf_flcount = 0; + pag->pagf_agflreset = false; +} + /* * Decide whether to use this allocation group for this allocation. * If so, fix up the btree freelist's size. @@ -2117,6 +2204,10 @@ xfs_alloc_fix_freelist( } } + /* reset a padding mismatched agfl before final free space check */ + if (pag->pagf_agflreset) + xfs_agfl_reset(tp, agbp, pag); + /* If there isn't enough total space or single-extent, reject it. */ need = xfs_alloc_min_freelist(mp, pag); if (!xfs_alloc_space_available(args, need, flags)) @@ -2273,6 +2364,7 @@ xfs_alloc_get_freelist( agf->agf_flfirst = 0; pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno)); + ASSERT(!pag->pagf_agflreset); be32_add_cpu(&agf->agf_flcount, -1); xfs_trans_agflist_delta(tp, -1); pag->pagf_flcount--; @@ -2384,6 +2476,7 @@ xfs_alloc_put_freelist( agf->agf_fllast = 0; pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno)); + ASSERT(!pag->pagf_agflreset); be32_add_cpu(&agf->agf_flcount, 1); xfs_trans_agflist_delta(tp, 1); pag->pagf_flcount++; @@ -2589,6 +2682,7 @@ xfs_alloc_read_agf( pag->pagb_count = 0; pag->pagb_tree = RB_ROOT; pag->pagf_init = 1; + pag->pagf_agflreset = xfs_agfl_needs_reset(mp, agf); } #ifdef DEBUG else if (!XFS_FORCED_SHUTDOWN(mp)) { diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index e0792d036be2..d359a88ea249 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -353,6 +353,7 @@ typedef struct xfs_perag { char pagi_inodeok; /* The agi is ok for inodes */ uint8_t pagf_levels[XFS_BTNUM_AGF]; /* # of levels in bno & cnt btree */ + bool pagf_agflreset; /* agfl requires reset before use */ uint32_t pagf_flcount; /* count of blocks in freelist */ xfs_extlen_t pagf_freeblks; /* total free blocks */ xfs_extlen_t pagf_longest; /* longest free space */ diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index bb5514688d47..06bc87369632 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -1513,7 +1513,7 @@ TRACE_EVENT(xfs_extent_busy_trim, __entry->tlen) ); -TRACE_EVENT(xfs_agf, +DECLARE_EVENT_CLASS(xfs_agf_class, TP_PROTO(struct xfs_mount *mp, struct xfs_agf *agf, int flags, unsigned long caller_ip), TP_ARGS(mp, agf, flags, caller_ip), @@ -1569,6 +1569,13 @@ TRACE_EVENT(xfs_agf, __entry->longest, (void *)__entry->caller_ip) ); +#define DEFINE_AGF_EVENT(name) \ +DEFINE_EVENT(xfs_agf_class, name, \ + TP_PROTO(struct xfs_mount *mp, struct xfs_agf *agf, int flags, \ + unsigned long caller_ip), \ + TP_ARGS(mp, agf, flags, caller_ip)) +DEFINE_AGF_EVENT(xfs_agf); +DEFINE_AGF_EVENT(xfs_agfl_reset); TRACE_EVENT(xfs_free_extent, TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno, From 88859f6cc5c41ec8235a9eb36f6c3f2a13f3ee26 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 18 Aug 2017 12:08:13 -0700 Subject: [PATCH 20/59] Input: synaptics - Lenovo Carbon X1 Gen5 (2017) devices should use RMI commit 9b2071028f8def49971a3b213ab6efd02a7e56e8 upstream. The touchpad on Lenovo Carbon X1 Gen 5 (2017 - Kabylake) is accessible over SMBUS/RMI, so let's activate it by default. Cc: stable@vger.kernel.org Reviewed-by: Benjamin Tissoires Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/synaptics.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index a246fc686bb7..0eca3fd9e5f3 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -172,6 +172,7 @@ static const char * const smbus_pnp_ids[] = { "LEN0048", /* X1 Carbon 3 */ "LEN0046", /* X250 */ "LEN004a", /* W541 */ + "LEN0072", /* X1 Carbon Gen 5 (2017) - Elan/ALPS trackpoint */ "LEN200f", /* T450s */ NULL }; From af504c5a88b3fca04855dd0868d33b7ec3025261 Mon Sep 17 00:00:00 2001 From: Edvard Holst Date: Sat, 3 Feb 2018 11:46:15 -0800 Subject: [PATCH 21/59] Input: synaptics - Lenovo Thinkpad X1 Carbon G5 (2017) with Elantech trackpoints should use RMI commit 15e2cffec3aa0d47a8d75ae80e1b136bfb5dff30 upstream. Lenovo use two different trackpoints in the fifth generation Thinkpad X1 Carbon. Both are accessible over SMBUS/RMI but the pnpIDs are missing. This patch is for the Elantech trackpoint specifically which also reports SMB version 3 so rmi_smbus needs to be updated in order to handle it. For the record, I was not the first one to come up with this patch as it has been floating around the internet for a while now. However, I have spent significant time with testing and my efforts to find the original author of the patch have been unsuccessful. Signed-off-by: Edvard Holst Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/synaptics.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 0eca3fd9e5f3..d2b14feef2db 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -173,6 +173,7 @@ static const char * const smbus_pnp_ids[] = { "LEN0046", /* X250 */ "LEN004a", /* W541 */ "LEN0072", /* X1 Carbon Gen 5 (2017) - Elan/ALPS trackpoint */ + "LEN0073", /* X1 Carbon G5 (Elantech) */ "LEN200f", /* T450s */ NULL }; From 9c707c93e179ca03b593d4f1c3789dbfe94d5157 Mon Sep 17 00:00:00 2001 From: Aaron Ma Date: Sat, 3 Feb 2018 11:49:22 -0800 Subject: [PATCH 22/59] Input: synaptics - add Intertouch support on X1 Carbon 6th and X280 commit 5717a09aeaf62d197deba1fc7ccd6bc45f3a9dcc upstream. Synaptics devices reported it has Intertouch support, and it fails via PS/2 as following logs: psmouse serio2: Failed to reset mouse on synaptics-pt/serio0 psmouse serio2: Failed to enable mouse on synaptics-pt/serio0 Set these new devices to use SMBus to fix this issue, then they report SMBus version 3 is using, patch: https://patchwork.kernel.org/patch/9989547/ enabled SMBus ver 3 and makes synaptics devices work fine on SMBus mode. Signed-off-by: Aaron Ma Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/synaptics.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index d2b14feef2db..16a0387e6543 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -174,6 +174,8 @@ static const char * const smbus_pnp_ids[] = { "LEN004a", /* W541 */ "LEN0072", /* X1 Carbon Gen 5 (2017) - Elan/ALPS trackpoint */ "LEN0073", /* X1 Carbon G5 (Elantech) */ + "LEN0092", /* X1 Carbon 6 */ + "LEN0096", /* X280 */ "LEN200f", /* T450s */ NULL }; From 9a85abc794831bc550de3e616588efff14d2c60a Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Tue, 22 May 2018 17:16:08 -0700 Subject: [PATCH 23/59] Input: synaptics - add Lenovo 80 series ids to SMBus commit ad8fb554f04e38f155c9bc34bbf521fc592ceee7 upstream. This time, Lenovo decided to go with different pieces in its latest series of Thinkpads. For those we have been able to test: - the T480 is using Synaptics with an IBM trackpoint -> it behaves properly with or without intertouch, there is no point not using RMI4 - the X1 Carbon 6th gen is using Synaptics with an IBM trackpoint -> the touchpad doesn't behave properly under PS/2 so we have to switch it to RMI4 if we do not want to have disappointed users - the X280 is using Synaptics with an ALPS trackpoint -> the recent fixes in the trackpoint handling fixed it so upstream now works fine with or without RMI4, and there is no point not using RMI4 - the T480s is using an Elan touchpad, so that's a different story Cc: # v4.14.x, v4.15.x, v4.16.x Signed-off-by: Benjamin Tissoires Acked-by: KT Liao Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/synaptics.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 16a0387e6543..6c4bbd38700e 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -172,10 +172,12 @@ static const char * const smbus_pnp_ids[] = { "LEN0048", /* X1 Carbon 3 */ "LEN0046", /* X250 */ "LEN004a", /* W541 */ + "LEN0071", /* T480 */ "LEN0072", /* X1 Carbon Gen 5 (2017) - Elan/ALPS trackpoint */ "LEN0073", /* X1 Carbon G5 (Elantech) */ "LEN0092", /* X1 Carbon 6 */ "LEN0096", /* X280 */ + "LEN0097", /* X280 -> ALPS trackpoint */ "LEN200f", /* T450s */ NULL }; From 2be683020be4bd8032ae07f6974656daa66f378d Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Tue, 22 May 2018 17:19:57 -0700 Subject: [PATCH 24/59] Input: elan_i2c_smbus - fix corrupted stack commit 40f7090bb1b4ec327ea1e1402ff5783af5b35195 upstream. New ICs (like the one on the Lenovo T480s) answer to ETP_SMBUS_IAP_VERSION_CMD 4 bytes instead of 3. This corrupts the stack as i2c_smbus_read_block_data() uses the values returned by the i2c device to know how many data it need to return. i2c_smbus_read_block_data() can read up to 32 bytes (I2C_SMBUS_BLOCK_MAX) and there is no safeguard on how many bytes are provided in the return value. Ensure we always have enough space for any future firmware. Also 0-initialize the values to prevent any access to uninitialized memory. Cc: # v4.4.x, v4.9.x, v4.14.x, v4.15.x, v4.16.x Signed-off-by: Benjamin Tissoires Acked-by: KT Liao Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/elan_i2c_smbus.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/input/mouse/elan_i2c_smbus.c b/drivers/input/mouse/elan_i2c_smbus.c index 29f99529b187..cfcb32559925 100644 --- a/drivers/input/mouse/elan_i2c_smbus.c +++ b/drivers/input/mouse/elan_i2c_smbus.c @@ -130,7 +130,7 @@ static int elan_smbus_get_baseline_data(struct i2c_client *client, bool max_baseline, u8 *value) { int error; - u8 val[3]; + u8 val[I2C_SMBUS_BLOCK_MAX] = {0}; error = i2c_smbus_read_block_data(client, max_baseline ? @@ -149,7 +149,7 @@ static int elan_smbus_get_version(struct i2c_client *client, bool iap, u8 *version) { int error; - u8 val[3]; + u8 val[I2C_SMBUS_BLOCK_MAX] = {0}; error = i2c_smbus_read_block_data(client, iap ? ETP_SMBUS_IAP_VERSION_CMD : @@ -170,7 +170,7 @@ static int elan_smbus_get_sm_version(struct i2c_client *client, u8 *clickpad) { int error; - u8 val[3]; + u8 val[I2C_SMBUS_BLOCK_MAX] = {0}; error = i2c_smbus_read_block_data(client, ETP_SMBUS_SM_VERSION_CMD, val); @@ -188,7 +188,7 @@ static int elan_smbus_get_sm_version(struct i2c_client *client, static int elan_smbus_get_product_id(struct i2c_client *client, u16 *id) { int error; - u8 val[3]; + u8 val[I2C_SMBUS_BLOCK_MAX] = {0}; error = i2c_smbus_read_block_data(client, ETP_SMBUS_UNIQUEID_CMD, val); @@ -205,7 +205,7 @@ static int elan_smbus_get_checksum(struct i2c_client *client, bool iap, u16 *csum) { int error; - u8 val[3]; + u8 val[I2C_SMBUS_BLOCK_MAX] = {0}; error = i2c_smbus_read_block_data(client, iap ? ETP_SMBUS_FW_CHECKSUM_CMD : @@ -226,7 +226,7 @@ static int elan_smbus_get_max(struct i2c_client *client, { int ret; int error; - u8 val[3]; + u8 val[I2C_SMBUS_BLOCK_MAX] = {0}; ret = i2c_smbus_read_block_data(client, ETP_SMBUS_RANGE_CMD, val); if (ret != 3) { @@ -246,7 +246,7 @@ static int elan_smbus_get_resolution(struct i2c_client *client, { int ret; int error; - u8 val[3]; + u8 val[I2C_SMBUS_BLOCK_MAX] = {0}; ret = i2c_smbus_read_block_data(client, ETP_SMBUS_RESOLUTION_CMD, val); if (ret != 3) { @@ -267,7 +267,7 @@ static int elan_smbus_get_num_traces(struct i2c_client *client, { int ret; int error; - u8 val[3]; + u8 val[I2C_SMBUS_BLOCK_MAX] = {0}; ret = i2c_smbus_read_block_data(client, ETP_SMBUS_XY_TRACENUM_CMD, val); if (ret != 3) { @@ -294,7 +294,7 @@ static int elan_smbus_iap_get_mode(struct i2c_client *client, { int error; u16 constant; - u8 val[3]; + u8 val[I2C_SMBUS_BLOCK_MAX] = {0}; error = i2c_smbus_read_block_data(client, ETP_SMBUS_IAP_CTRL_CMD, val); if (error < 0) { @@ -345,7 +345,7 @@ static int elan_smbus_prepare_fw_update(struct i2c_client *client) int len; int error; enum tp_mode mode; - u8 val[3]; + u8 val[I2C_SMBUS_BLOCK_MAX] = {0}; u8 cmd[4] = {0x0F, 0x78, 0x00, 0x06}; u16 password; @@ -419,7 +419,7 @@ static int elan_smbus_write_fw_block(struct i2c_client *client, struct device *dev = &client->dev; int error; u16 result; - u8 val[3]; + u8 val[I2C_SMBUS_BLOCK_MAX] = {0}; /* * Due to the limitation of smbus protocol limiting From 8441a0014a29e4230d492947e5599415f219d9a9 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Sun, 27 May 2018 20:54:44 -0400 Subject: [PATCH 25/59] tracing: Fix crash when freeing instances with event triggers commit 86b389ff22bd6ad8fd3cb98e41cd271886c6d023 upstream. If a instance has an event trigger enabled when it is freed, it could cause an access of free memory. Here's the case that crashes: # cd /sys/kernel/tracing # mkdir instances/foo # echo snapshot > instances/foo/events/initcall/initcall_start/trigger # rmdir instances/foo Would produce: general protection fault: 0000 [#1] PREEMPT SMP PTI Modules linked in: tun bridge ... CPU: 5 PID: 6203 Comm: rmdir Tainted: G W 4.17.0-rc4-test+ #933 Hardware name: Hewlett-Packard HP Compaq Pro 6300 SFF/339A, BIOS K01 v03.03 07/14/2016 RIP: 0010:clear_event_triggers+0x3b/0x70 RSP: 0018:ffffc90003783de0 EFLAGS: 00010286 RAX: 0000000000000000 RBX: 6b6b6b6b6b6b6b2b RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff8800c7130ba0 RBP: ffffc90003783e00 R08: ffff8801131993f8 R09: 0000000100230016 R10: ffffc90003783d80 R11: 0000000000000000 R12: ffff8800c7130ba0 R13: ffff8800c7130bd8 R14: ffff8800cc093768 R15: 00000000ffffff9c FS: 00007f6f4aa86700(0000) GS:ffff88011eb40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f6f4a5aed60 CR3: 00000000cd552001 CR4: 00000000001606e0 Call Trace: event_trace_del_tracer+0x2a/0xc5 instance_rmdir+0x15c/0x200 tracefs_syscall_rmdir+0x52/0x90 vfs_rmdir+0xdb/0x160 do_rmdir+0x16d/0x1c0 __x64_sys_rmdir+0x17/0x20 do_syscall_64+0x55/0x1a0 entry_SYSCALL_64_after_hwframe+0x49/0xbe This was due to the call the clears out the triggers when an instance is being deleted not removing the trigger from the link list. Cc: stable@vger.kernel.org Fixes: 85f2b08268c01 ("tracing: Add basic event trigger framework") Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events_trigger.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index f2ac9d44f6c4..c7c12f82095c 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -482,9 +482,10 @@ clear_event_triggers(struct trace_array *tr) struct trace_event_file *file; list_for_each_entry(file, &tr->events, list) { - struct event_trigger_data *data; - list_for_each_entry_rcu(data, &file->triggers, list) { + struct event_trigger_data *data, *n; + list_for_each_entry_safe(data, n, &file->triggers, list) { trace_event_trigger_enable_disable(file, 0); + list_del_rcu(&data->list); if (data->ops->free) data->ops->free(data->ops, data); } From c6a95f37d3a0539424b66c2e31b657b10aaa2b53 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 28 May 2018 10:56:36 -0400 Subject: [PATCH 26/59] tracing: Make the snapshot trigger work with instances commit 2824f5033248600673e3e126a4d135363cbfd9ac upstream. The snapshot trigger currently only affects the main ring buffer, even when it is used by the instances. This can be confusing as the snapshot trigger is listed in the instance. > # cd /sys/kernel/tracing > # mkdir instances/foo > # echo snapshot > instances/foo/events/syscalls/sys_enter_fchownat/trigger > # echo top buffer > trace_marker > # echo foo buffer > instances/foo/trace_marker > # touch /tmp/bar > # chown rostedt /tmp/bar > # cat instances/foo/snapshot # tracer: nop # # # * Snapshot is freed * # # Snapshot commands: # echo 0 > snapshot : Clears and frees snapshot buffer # echo 1 > snapshot : Allocates snapshot buffer, if not already allocated. # Takes a snapshot of the main buffer. # echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free) # (Doesn't have to be '2' works with any number that # is not a '0' or '1') > # cat snapshot # tracer: nop # # _-----=> irqs-off # / _----=> need-resched # | / _---=> hardirq/softirq # || / _--=> preempt-depth # ||| / delay # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | bash-1189 [000] .... 111.488323: tracing_mark_write: top buffer Not only did the snapshot occur in the top level buffer, but the instance snapshot buffer should have been allocated, and it is still free. Cc: stable@vger.kernel.org Fixes: 85f2b08268c01 ("tracing: Add basic event trigger framework") Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 12 ++++++------ kernel/trace/trace.h | 11 +++++++++++ kernel/trace/trace_events_trigger.c | 10 ++++++++-- 3 files changed, 25 insertions(+), 8 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 76bcc80b893e..520ecaf61dc4 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -894,7 +894,7 @@ int __trace_bputs(unsigned long ip, const char *str) EXPORT_SYMBOL_GPL(__trace_bputs); #ifdef CONFIG_TRACER_SNAPSHOT -static void tracing_snapshot_instance(struct trace_array *tr) +void tracing_snapshot_instance(struct trace_array *tr) { struct tracer *tracer = tr->current_trace; unsigned long flags; @@ -950,7 +950,7 @@ static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf, struct trace_buffer *size_buf, int cpu_id); static void set_buffer_entries(struct trace_buffer *buf, unsigned long val); -static int alloc_snapshot(struct trace_array *tr) +int tracing_alloc_snapshot_instance(struct trace_array *tr) { int ret; @@ -996,7 +996,7 @@ int tracing_alloc_snapshot(void) struct trace_array *tr = &global_trace; int ret; - ret = alloc_snapshot(tr); + ret = tracing_alloc_snapshot_instance(tr); WARN_ON(ret < 0); return ret; @@ -5400,7 +5400,7 @@ static int tracing_set_tracer(struct trace_array *tr, const char *buf) #ifdef CONFIG_TRACER_MAX_TRACE if (t->use_max_tr && !had_max_tr) { - ret = alloc_snapshot(tr); + ret = tracing_alloc_snapshot_instance(tr); if (ret < 0) goto out; } @@ -6378,7 +6378,7 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, } #endif if (!tr->allocated_snapshot) { - ret = alloc_snapshot(tr); + ret = tracing_alloc_snapshot_instance(tr); if (ret < 0) break; } @@ -7099,7 +7099,7 @@ ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash, return ret; out_reg: - ret = alloc_snapshot(tr); + ret = tracing_alloc_snapshot_instance(tr); if (ret < 0) goto out; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 401b0639116f..851cd1605085 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1807,6 +1807,17 @@ static inline void __init trace_event_init(void) { } static inline void trace_event_eval_update(struct trace_eval_map **map, int len) { } #endif +#ifdef CONFIG_TRACER_SNAPSHOT +void tracing_snapshot_instance(struct trace_array *tr); +int tracing_alloc_snapshot_instance(struct trace_array *tr); +#else +static inline void tracing_snapshot_instance(struct trace_array *tr) { } +static inline int tracing_alloc_snapshot_instance(struct trace_array *tr) +{ + return 0; +} +#endif + extern struct trace_iterator *tracepoint_print_iter; #endif /* _LINUX_KERNEL_TRACE_H */ diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index c7c12f82095c..b413fab7d75b 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -642,6 +642,7 @@ event_trigger_callback(struct event_command *cmd_ops, trigger_data->count = -1; trigger_data->ops = trigger_ops; trigger_data->cmd_ops = cmd_ops; + trigger_data->private_data = file; INIT_LIST_HEAD(&trigger_data->list); INIT_LIST_HEAD(&trigger_data->named_list); @@ -1042,7 +1043,12 @@ static struct event_command trigger_traceoff_cmd = { static void snapshot_trigger(struct event_trigger_data *data, void *rec) { - tracing_snapshot(); + struct trace_event_file *file = data->private_data; + + if (file) + tracing_snapshot_instance(file->tr); + else + tracing_snapshot(); } static void @@ -1064,7 +1070,7 @@ register_snapshot_trigger(char *glob, struct event_trigger_ops *ops, { int ret = register_trigger(glob, ops, data, file); - if (ret > 0 && tracing_alloc_snapshot() != 0) { + if (ret > 0 && tracing_alloc_snapshot_instance(file->tr) != 0) { unregister_trigger(glob, ops, data, file); ret = 0; } From 9808c97d3cb4f31629e16fbd2673a600bef3235d Mon Sep 17 00:00:00 2001 From: Sachin Grover Date: Fri, 25 May 2018 14:01:39 +0530 Subject: [PATCH 27/59] selinux: KASAN: slab-out-of-bounds in xattr_getsecurity commit efe3de79e0b52ca281ef6691480c8c68c82a4657 upstream. Call trace: [] dump_backtrace+0x0/0x428 [] show_stack+0x28/0x38 [] dump_stack+0xd4/0x124 [] print_address_description+0x68/0x258 [] kasan_report.part.2+0x228/0x2f0 [] kasan_report+0x5c/0x70 [] check_memory_region+0x12c/0x1c0 [] memcpy+0x34/0x68 [] xattr_getsecurity+0xe0/0x160 [] vfs_getxattr+0xc8/0x120 [] getxattr+0x100/0x2c8 [] SyS_fgetxattr+0x64/0xa0 [] el0_svc_naked+0x24/0x28 If user get root access and calls security.selinux setxattr() with an embedded NUL on a file and then if some process performs a getxattr() on that file with a length greater than the actual length of the string, it would result in a panic. To fix this, add the actual length of the string to the security context instead of the length passed by the userspace process. Signed-off-by: Sachin Grover Cc: stable@vger.kernel.org Signed-off-by: Paul Moore Signed-off-by: Greg Kroah-Hartman --- security/selinux/ss/services.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index c9c031e3d1ae..b275743e23cc 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -1448,7 +1448,7 @@ static int security_context_to_sid_core(const char *scontext, u32 scontext_len, scontext_len, &context, def_sid); if (rc == -EINVAL && force) { context.str = str; - context.len = scontext_len; + context.len = strlen(str) + 1; str = NULL; } else if (rc) goto out_unlock; From bc342bc0295422962fbbdd5aa1dd17b3bb7345bf Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 14 May 2018 20:09:24 -0700 Subject: [PATCH 28/59] cfg80211: further limit wiphy names to 64 bytes commit 814596495dd2b9d4aab92d8f89cf19060d25d2ea upstream. wiphy names were recently limited to 128 bytes by commit a7cfebcb7594 ("cfg80211: limit wiphy names to 128 bytes"). As it turns out though, this isn't sufficient because dev_vprintk_emit() needs the syslog header string "SUBSYSTEM=ieee80211\0DEVICE=+ieee80211:$devname" to fit into 128 bytes. This triggered the "device/subsystem name too long" WARN when the device name was >= 90 bytes. As before, this was reproduced by syzbot by sending an HWSIM_CMD_NEW_RADIO command to the MAC80211_HWSIM generic netlink family. Fix it by further limiting wiphy names to 64 bytes. Reported-by: syzbot+e64565577af34b3768dc@syzkaller.appspotmail.com Fixes: a7cfebcb7594 ("cfg80211: limit wiphy names to 128 bytes") Signed-off-by: Eric Biggers Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/nl80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 3fab6c81917f..f41ea5af22ee 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2604,7 +2604,7 @@ enum nl80211_attrs { #define NL80211_ATTR_KEYS NL80211_ATTR_KEYS #define NL80211_ATTR_FEATURE_FLAGS NL80211_ATTR_FEATURE_FLAGS -#define NL80211_WIPHY_NAME_MAXLEN 128 +#define NL80211_WIPHY_NAME_MAXLEN 64 #define NL80211_MAX_SUPP_RATES 32 #define NL80211_MAX_SUPP_HT_RATES 77 From 151b144bc60268db7dfe3ef91ce5a4cb756677ae Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Sat, 7 Oct 2017 13:23:23 -0700 Subject: [PATCH 29/59] kbuild: clang: remove crufty HOSTCFLAGS commit df16aaac26e92e97ab7234d3f93c953466adc4b5 upstream. When compiling with `make CC=clang HOSTCC=clang`, I was seeing warnings that clang did not recognize -fno-delete-null-pointer-checks for HOSTCC targets. These were added in commit 61163efae020 ("kbuild: LLVMLinux: Add Kbuild support for building kernel with Clang"). Clang does not support -fno-delete-null-pointer-checks, so adding it to HOSTCFLAGS if HOSTCC is clang does not make sense. It's not clear why the other warnings were disabled, and just for HOSTCFLAGS, but I can remove them, add -Werror to HOSTCFLAGS and compile with clang just fine. Suggested-by: Masahiro Yamada Signed-off-by: Nick Desaulniers Signed-off-by: Masahiro Yamada Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- Makefile | 5 ----- 1 file changed, 5 deletions(-) diff --git a/Makefile b/Makefile index d6db01a02252..4147a4bf6b98 100644 --- a/Makefile +++ b/Makefile @@ -369,11 +369,6 @@ HOSTCXXFLAGS := -O2 $(HOST_LFS_CFLAGS) HOSTLDFLAGS := $(HOST_LFS_LDFLAGS) HOST_LOADLIBES := $(HOST_LFS_LIBS) -ifeq ($(shell $(HOSTCC) -v 2>&1 | grep -c "clang version"), 1) -HOSTCFLAGS += -Wno-unused-value -Wno-unused-parameter \ - -Wno-missing-field-initializers -fno-delete-null-pointer-checks -endif - # Make variables (CC, etc...) AS = $(CROSS_COMPILE)as LD = $(CROSS_COMPILE)ld From 5a92c6e3e2f3616cb801e33950dc8ec7d25dee0b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 26 Aug 2017 12:09:33 +0100 Subject: [PATCH 30/59] drm/i915: Always sanity check engine state upon idling commit cad9946c2a4375386062131858881cfd30fc1b8f upstream. When we do a locked idle we know that afterwards all requests have been completed and the engines have been cleared of tasks. For whatever reason, this doesn't always happen and we may go into a suspend with ELSP still full, and this causes an issue upon resume as we get very, very confused. If the engines refuse to idle, mark the device as wedged. In the process we get rid of the maybe unused open-coded version of wait_for_engines reported by Nick Desaulniers and Matthias Kaehlcke. v2: Suppress the -EIO before suspend, but keep it for seqno wrap. References: https://bugs.freedesktop.org/show_bug.cgi?id=101891 References: https://bugs.freedesktop.org/show_bug.cgi?id=102456 Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Joonas Lahtinen Cc: Matthias Kaehlcke Link: https://patchwork.freedesktop.org/patch/msgid/20170826110935.10237-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen Reviewed-by: Mika Kuoppala Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_gem.c | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 3b2c0538e48d..90359c7954c8 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3378,24 +3378,12 @@ static int wait_for_timeline(struct i915_gem_timeline *tl, unsigned int flags) return 0; } -static int wait_for_engine(struct intel_engine_cs *engine, int timeout_ms) -{ - return wait_for(intel_engine_is_idle(engine), timeout_ms); -} - static int wait_for_engines(struct drm_i915_private *i915) { - struct intel_engine_cs *engine; - enum intel_engine_id id; - - for_each_engine(engine, i915, id) { - if (GEM_WARN_ON(wait_for_engine(engine, 50))) { - i915_gem_set_wedged(i915); - return -EIO; - } - - GEM_BUG_ON(intel_engine_get_seqno(engine) != - intel_engine_last_submit(engine)); + if (wait_for(intel_engines_are_idle(i915), 50)) { + DRM_ERROR("Failed to idle engines, declaring wedged!\n"); + i915_gem_set_wedged(i915); + return -EIO; } return 0; @@ -4575,7 +4563,7 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) ret = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED); - if (ret) + if (ret && ret != -EIO) goto err_unlock; assert_kernel_context_is_current(dev_priv); @@ -4619,11 +4607,12 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) * machine in an unusable condition. */ i915_gem_sanitize(dev_priv); - goto out_rpm_put; + + intel_runtime_pm_put(dev_priv); + return 0; err_unlock: mutex_unlock(&dev->struct_mutex); -out_rpm_put: intel_runtime_pm_put(dev_priv); return ret; } From 074e30a3fc09e7289143fd1ad2ae6edd126ee3ee Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 15 Sep 2017 00:05:16 +0100 Subject: [PATCH 31/59] dma-buf: remove redundant initialization of sg_table commit 531beb067c6185aceabfdee0965234c6a8fd133b upstream. sg_table is being initialized and is never read before it is updated again later on, hence making the initialization redundant. Remove the initialization. Detected by clang scan-build: "warning: Value stored to 'sg_table' during its initialization is never read" Signed-off-by: Colin Ian King Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20170914230516.6056-1-colin.king@canonical.com Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/dma-buf/dma-buf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 4a038dcf5361..bc1cb284111c 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -625,7 +625,7 @@ EXPORT_SYMBOL_GPL(dma_buf_detach); struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach, enum dma_data_direction direction) { - struct sg_table *sg_table = ERR_PTR(-EINVAL); + struct sg_table *sg_table; might_sleep(); From 8524af02d13227a2ed881e7ba07974646a83b947 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Wed, 7 Feb 2018 10:58:43 -0800 Subject: [PATCH 32/59] drm/amd/powerplay: Fix enum mismatch commit 42b5122e828a6ccd9952ad3116343dc032d33efe upstream. In several locations the driver uses AMD_CG_STATE_UNGATE (type enum amd_clockgating_state) instead of AMD_PG_STATE_UNGATE (type enum amd_powergating_stat) and vice versa. Both constants have the same value, so this doesn't cause any problems, but we still want to pass the correct type. Fixing the mismatch resolves multiple warnings like this when building with clang: drivers/gpu/drm/amd/amdgpu/../powerplay/hwmgr/cz_clockpowergating.c:169:7: error: implicit conversion from enumeration type 'enum amd_powergating_state' to different enumeration type 'enum amd_clockgating_state' [-Werror,-Wenum-conversion] AMD_PG_STATE_UNGATE); Reviewed-by: Guenter Roeck Signed-off-by: Matthias Kaehlcke Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c | 8 ++++---- .../gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c index b33935fcf428..e6c6994e74ba 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c @@ -176,10 +176,10 @@ int cz_dpm_powergate_uvd(struct pp_hwmgr *hwmgr, bool bgate) cz_dpm_powerup_uvd(hwmgr); cgs_set_clockgating_state(hwmgr->device, AMD_IP_BLOCK_TYPE_UVD, - AMD_PG_STATE_UNGATE); + AMD_CG_STATE_UNGATE); cgs_set_powergating_state(hwmgr->device, AMD_IP_BLOCK_TYPE_UVD, - AMD_CG_STATE_UNGATE); + AMD_PG_STATE_UNGATE); cz_dpm_update_uvd_dpm(hwmgr, false); } @@ -208,11 +208,11 @@ int cz_dpm_powergate_vce(struct pp_hwmgr *hwmgr, bool bgate) cgs_set_clockgating_state( hwmgr->device, AMD_IP_BLOCK_TYPE_VCE, - AMD_PG_STATE_UNGATE); + AMD_CG_STATE_UNGATE); cgs_set_powergating_state( hwmgr->device, AMD_IP_BLOCK_TYPE_VCE, - AMD_CG_STATE_UNGATE); + AMD_PG_STATE_UNGATE); cz_dpm_update_vce_dpm(hwmgr); cz_enable_disable_vce_dpm(hwmgr, true); return 0; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c index 261b828ad590..2f3509be226f 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c @@ -162,7 +162,7 @@ int smu7_powergate_uvd(struct pp_hwmgr *hwmgr, bool bgate) AMD_CG_STATE_UNGATE); cgs_set_powergating_state(hwmgr->device, AMD_IP_BLOCK_TYPE_UVD, - AMD_CG_STATE_UNGATE); + AMD_PG_STATE_UNGATE); smu7_update_uvd_dpm(hwmgr, false); } From bcc9c6f0320189274e6ef6f9062d650b8e432065 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Thu, 8 Feb 2018 16:57:12 -0800 Subject: [PATCH 33/59] rtlwifi: rtl8192cu: Remove variable self-assignment in rf.c commit fb239c1209bb0f0b4830cc72507cc2f2d63fadbd upstream. In _rtl92c_get_txpower_writeval_by_regulatory() the variable writeVal is assigned to itself in an if ... else statement, apparently only to document that the branch condition is handled and that a previously read value should be returned unmodified. The self-assignment causes clang to raise the following warning: drivers/net/wireless/realtek/rtlwifi/rtl8192cu/rf.c:304:13: error: explicitly assigning value of variable of type 'u32' (aka 'unsigned int') to itself [-Werror,-Wself-assign] writeVal = writeVal; Delete the branch with the self-assignment. Signed-off-by: Matthias Kaehlcke Acked-by: Larry Finger Reviewed-by: Guenter Roeck Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/realtek/rtlwifi/rtl8192cu/rf.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/rf.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/rf.c index 9cff6bc4049c..cf551785eb08 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/rf.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/rf.c @@ -299,9 +299,6 @@ static void _rtl92c_get_txpower_writeval_by_regulatory(struct ieee80211_hw *hw, writeVal = 0x00000000; if (rtlpriv->dm.dynamic_txhighpower_lvl == TXHIGHPWRLEVEL_BT1) writeVal = writeVal - 0x06060606; - else if (rtlpriv->dm.dynamic_txhighpower_lvl == - TXHIGHPWRLEVEL_BT2) - writeVal = writeVal; *(p_outwriteval + rf) = writeVal; } } From a81920c73eb04fd63f7a52ffe22e210c96414c90 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 19 Oct 2017 14:33:52 +0200 Subject: [PATCH 34/59] ASoC: Intel: sst: remove redundant variable dma_dev_name commit 271ef65b5882425d500e969e875c98e47a6b0c86 upstream. The pointer dma_dev_name is assigned but never read, it is redundant and can therefore be removed. Cleans up clang warning: sound/soc/intel/common/sst-firmware.c:288:3: warning: Value stored to 'dma_dev_name' is never read Signed-off-by: Colin Ian King Acked-by: Pierre-Louis Bossart Signed-off-by: Mark Brown Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- sound/soc/intel/common/sst-firmware.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/sound/soc/intel/common/sst-firmware.c b/sound/soc/intel/common/sst-firmware.c index a086c35f91bb..79a9fdf94d38 100644 --- a/sound/soc/intel/common/sst-firmware.c +++ b/sound/soc/intel/common/sst-firmware.c @@ -274,7 +274,6 @@ int sst_dma_new(struct sst_dsp *sst) struct sst_pdata *sst_pdata = sst->pdata; struct sst_dma *dma; struct resource mem; - const char *dma_dev_name; int ret = 0; if (sst->pdata->resindex_dma_base == -1) @@ -285,7 +284,6 @@ int sst_dma_new(struct sst_dsp *sst) * is attached to the ADSP IP. */ switch (sst->pdata->dma_engine) { case SST_DMA_TYPE_DW: - dma_dev_name = "dw_dmac"; break; default: dev_err(sst->dev, "error: invalid DMA engine %d\n", From 9b6eda5797b182ab6460c25ec9a186a7fbf92a52 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 31 Oct 2017 10:27:47 +0000 Subject: [PATCH 35/59] platform/chrome: cros_ec_lpc: remove redundant pointer request commit d3b56c566d4ba8cae688baf3cca94425d57ea783 upstream. Pointer request is being assigned but never used, so remove it. Cleans up the clang warning: drivers/platform/chrome/cros_ec_lpc.c:68:2: warning: Value stored to 'request' is never read Signed-off-by: Colin Ian King Signed-off-by: Benson Leung Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/platform/chrome/cros_ec_lpc.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/platform/chrome/cros_ec_lpc.c b/drivers/platform/chrome/cros_ec_lpc.c index 1baf720faf69..87e9747d229a 100644 --- a/drivers/platform/chrome/cros_ec_lpc.c +++ b/drivers/platform/chrome/cros_ec_lpc.c @@ -54,7 +54,6 @@ static int ec_response_timed_out(void) static int cros_ec_pkt_xfer_lpc(struct cros_ec_device *ec, struct cros_ec_command *msg) { - struct ec_host_request *request; struct ec_host_response response; u8 sum; int ret = 0; @@ -65,8 +64,6 @@ static int cros_ec_pkt_xfer_lpc(struct cros_ec_device *ec, /* Write buffer */ cros_ec_lpc_write_bytes(EC_LPC_ADDR_HOST_PACKET, ret, ec->dout); - request = (struct ec_host_request *)ec->dout; - /* Here we go */ sum = EC_COMMAND_PROTOCOL_3; cros_ec_lpc_write_bytes(EC_LPC_ADDR_HOST_CMD, 1, &sum); From 96b086a7bfe575c3626cf2a9af301539c2623b9b Mon Sep 17 00:00:00 2001 From: Sodagudi Prasad Date: Tue, 6 Feb 2018 15:46:51 -0800 Subject: [PATCH 36/59] kbuild: clang: disable unused variable warnings only when constant commit 0a5f41767444cc3b4fc5573921ab914b4f78baaa upstream. Currently, GCC disables -Wunused-const-variable, but not -Wunused-variable, so warns unused variables if they are non-constant. While, Clang does not warn unused variables at all regardless of the const qualifier because -Wno-unused-const-variable is implied by the stronger option -Wno-unused-variable. Disable -Wunused-const-variable instead of -Wunused-variable so that GCC and Clang work in the same way. Signed-off-by: Prasad Sodagudi Signed-off-by: Masahiro Yamada Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- Makefile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 4147a4bf6b98..a78653846850 100644 --- a/Makefile +++ b/Makefile @@ -706,7 +706,6 @@ KBUILD_CFLAGS += $(stackp-flag) ifeq ($(cc-name),clang) KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,) -KBUILD_CFLAGS += $(call cc-disable-warning, unused-variable) KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier) KBUILD_CFLAGS += $(call cc-disable-warning, gnu) KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) @@ -724,9 +723,9 @@ else # These warnings generated too much noise in a regular build. # Use make W=1 to enable them (see scripts/Makefile.extrawarn) KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable) -KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable) endif +KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable) ifdef CONFIG_FRAME_POINTER KBUILD_CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls else From a6f81fcb2c3905c28641837dc823ed34617eb110 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 10 Dec 2017 17:55:03 -0800 Subject: [PATCH 37/59] tcp: avoid integer overflows in tcp_rcv_space_adjust() commit 607065bad9931e72207b0cac365d7d4abc06bd99 upstream. When using large tcp_rmem[2] values (I did tests with 500 MB), I noticed overflows while computing rcvwin. Lets fix this before the following patch. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Acked-by: Wei Wang Acked-by: Neal Cardwell Signed-off-by: David S. Miller [Backport: sysctl_tcp_rmem is not Namespace-ify'd in older kernels] Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- include/linux/tcp.h | 2 +- net/ipv4/tcp_input.c | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index e8418fc77a43..fe322fa611e6 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -334,7 +334,7 @@ struct tcp_sock { /* Receiver queue space */ struct { - int space; + u32 space; u32 seq; u64 time; } rcvq_space; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ebbb54bcbcac..125b49c166a4 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -591,8 +591,8 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk, void tcp_rcv_space_adjust(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); + u32 copied; int time; - int copied; tcp_mstamp_refresh(tp); time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time); @@ -615,12 +615,13 @@ void tcp_rcv_space_adjust(struct sock *sk) if (sysctl_tcp_moderate_rcvbuf && !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { - int rcvwin, rcvmem, rcvbuf; + int rcvmem, rcvbuf; + u64 rcvwin; /* minimal window to cope with packet losses, assuming * steady state. Add some cushion because of small variations. */ - rcvwin = (copied << 1) + 16 * tp->advmss; + rcvwin = ((u64)copied << 1) + 16 * tp->advmss; /* If rate increased by 25%, * assume slow start, rcvwin = 3 * copied @@ -640,7 +641,8 @@ void tcp_rcv_space_adjust(struct sock *sk) while (tcp_win_from_space(rcvmem) < tp->advmss) rcvmem += 128; - rcvbuf = min(rcvwin / tp->advmss * rcvmem, sysctl_tcp_rmem[2]); + do_div(rcvwin, tp->advmss); + rcvbuf = min_t(u64, rcvwin * rcvmem, sysctl_tcp_rmem[2]); if (rcvbuf > sk->sk_rcvbuf) { sk->sk_rcvbuf = rcvbuf; From d30819abd1c4768ba976821465c682e66e1605ee Mon Sep 17 00:00:00 2001 From: Michael Nosthoff Date: Fri, 9 Mar 2018 16:13:52 +0100 Subject: [PATCH 38/59] iio: ad7793: implement IIO_CHAN_INFO_SAMP_FREQ commit 490fba90a90eb7b741f57fefd2bcf2c1e11eb471 upstream. This commit is a follow-up to changes made to ad_sigma_delta.h in staging: iio: ad7192: implement IIO_CHAN_INFO_SAMP_FREQ which broke ad7793 as it was not altered to match those changes. This driver predates the availability of IIO_CHAN_INFO_SAMP_FREQ attribute wherein usage has some advantages like it can be accessed by in-kernel consumers as well as reduces the code size. Therefore, use IIO_CHAN_INFO_SAMP_FREQ to implement the sampling_frequency attribute instead of using IIO_DEV_ATTR_SAMP_FREQ() macro. Move code from the functions associated with IIO_DEV_ATTR_SAMP_FREQ() into respective read and write hooks with the mask set to IIO_CHAN_INFO_SAMP_FREQ. Fixes: a13e831fcaa7 ("staging: iio: ad7192: implement IIO_CHAN_INFO_SAMP_FREQ") Signed-off-by: Michael Nosthoff Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/ad7793.c | 75 +++++++++++++--------------------------- 1 file changed, 24 insertions(+), 51 deletions(-) diff --git a/drivers/iio/adc/ad7793.c b/drivers/iio/adc/ad7793.c index 47c3d7f32900..07246a6037e3 100644 --- a/drivers/iio/adc/ad7793.c +++ b/drivers/iio/adc/ad7793.c @@ -348,55 +348,6 @@ static const u16 ad7793_sample_freq_avail[16] = {0, 470, 242, 123, 62, 50, 39, static const u16 ad7797_sample_freq_avail[16] = {0, 0, 0, 123, 62, 50, 0, 33, 0, 17, 16, 12, 10, 8, 6, 4}; -static ssize_t ad7793_read_frequency(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct iio_dev *indio_dev = dev_to_iio_dev(dev); - struct ad7793_state *st = iio_priv(indio_dev); - - return sprintf(buf, "%d\n", - st->chip_info->sample_freq_avail[AD7793_MODE_RATE(st->mode)]); -} - -static ssize_t ad7793_write_frequency(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t len) -{ - struct iio_dev *indio_dev = dev_to_iio_dev(dev); - struct ad7793_state *st = iio_priv(indio_dev); - long lval; - int i, ret; - - ret = kstrtol(buf, 10, &lval); - if (ret) - return ret; - - if (lval == 0) - return -EINVAL; - - for (i = 0; i < 16; i++) - if (lval == st->chip_info->sample_freq_avail[i]) - break; - if (i == 16) - return -EINVAL; - - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; - st->mode &= ~AD7793_MODE_RATE(-1); - st->mode |= AD7793_MODE_RATE(i); - ad_sd_write_reg(&st->sd, AD7793_REG_MODE, sizeof(st->mode), st->mode); - iio_device_release_direct_mode(indio_dev); - - return len; -} - -static IIO_DEV_ATTR_SAMP_FREQ(S_IWUSR | S_IRUGO, - ad7793_read_frequency, - ad7793_write_frequency); - static IIO_CONST_ATTR_SAMP_FREQ_AVAIL( "470 242 123 62 50 39 33 19 17 16 12 10 8 6 4"); @@ -424,7 +375,6 @@ static IIO_DEVICE_ATTR_NAMED(in_m_in_scale_available, ad7793_show_scale_available, NULL, 0); static struct attribute *ad7793_attributes[] = { - &iio_dev_attr_sampling_frequency.dev_attr.attr, &iio_const_attr_sampling_frequency_available.dev_attr.attr, &iio_dev_attr_in_m_in_scale_available.dev_attr.attr, NULL @@ -435,7 +385,6 @@ static const struct attribute_group ad7793_attribute_group = { }; static struct attribute *ad7797_attributes[] = { - &iio_dev_attr_sampling_frequency.dev_attr.attr, &iio_const_attr_sampling_frequency_available_ad7797.dev_attr.attr, NULL }; @@ -505,6 +454,10 @@ static int ad7793_read_raw(struct iio_dev *indio_dev, *val -= offset; } return IIO_VAL_INT; + case IIO_CHAN_INFO_SAMP_FREQ: + *val = st->chip_info + ->sample_freq_avail[AD7793_MODE_RATE(st->mode)]; + return IIO_VAL_INT; } return -EINVAL; } @@ -542,6 +495,26 @@ static int ad7793_write_raw(struct iio_dev *indio_dev, break; } break; + case IIO_CHAN_INFO_SAMP_FREQ: + if (!val) { + ret = -EINVAL; + break; + } + + for (i = 0; i < 16; i++) + if (val == st->chip_info->sample_freq_avail[i]) + break; + + if (i == 16) { + ret = -EINVAL; + break; + } + + st->mode &= ~AD7793_MODE_RATE(-1); + st->mode |= AD7793_MODE_RATE(i); + ad_sd_write_reg(&st->sd, AD7793_REG_MODE, sizeof(st->mode), + st->mode); + break; default: ret = -EINVAL; } From 30ab9366f763928176793469fb5c3e0ebe89aeb5 Mon Sep 17 00:00:00 2001 From: Martin Kelly Date: Mon, 26 Mar 2018 14:27:51 -0700 Subject: [PATCH 39/59] iio:buffer: make length types match kfifo types commit c043ec1ca5baae63726aae32abbe003192bc6eec upstream. Currently, we use int for buffer length and bytes_per_datum. However, kfifo uses unsigned int for length and size_t for element size. We need to make sure these matches or we will have bugs related to overflow (in the range between INT_MAX and UINT_MAX for length, for example). In addition, set_bytes_per_datum uses size_t while bytes_per_datum is an int, which would cause bugs for large values of bytes_per_datum. Change buffer length to use unsigned int and bytes_per_datum to use size_t. Signed-off-by: Martin Kelly Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/buffer/industrialio-buffer-dma.c | 2 +- drivers/iio/buffer/kfifo_buf.c | 4 ++-- include/linux/iio/buffer_impl.h | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/iio/buffer/industrialio-buffer-dma.c b/drivers/iio/buffer/industrialio-buffer-dma.c index ff03324dee13..0a7289571b68 100644 --- a/drivers/iio/buffer/industrialio-buffer-dma.c +++ b/drivers/iio/buffer/industrialio-buffer-dma.c @@ -587,7 +587,7 @@ EXPORT_SYMBOL_GPL(iio_dma_buffer_set_bytes_per_datum); * Should be used as the set_length callback for iio_buffer_access_ops * struct for DMA buffers. */ -int iio_dma_buffer_set_length(struct iio_buffer *buffer, int length) +int iio_dma_buffer_set_length(struct iio_buffer *buffer, unsigned int length) { /* Avoid an invalid state */ if (length < 2) diff --git a/drivers/iio/buffer/kfifo_buf.c b/drivers/iio/buffer/kfifo_buf.c index 047fe757ab97..ac622edf2486 100644 --- a/drivers/iio/buffer/kfifo_buf.c +++ b/drivers/iio/buffer/kfifo_buf.c @@ -22,7 +22,7 @@ struct iio_kfifo { #define iio_to_kfifo(r) container_of(r, struct iio_kfifo, buffer) static inline int __iio_allocate_kfifo(struct iio_kfifo *buf, - int bytes_per_datum, int length) + size_t bytes_per_datum, unsigned int length) { if ((length == 0) || (bytes_per_datum == 0)) return -EINVAL; @@ -67,7 +67,7 @@ static int iio_set_bytes_per_datum_kfifo(struct iio_buffer *r, size_t bpd) return 0; } -static int iio_set_length_kfifo(struct iio_buffer *r, int length) +static int iio_set_length_kfifo(struct iio_buffer *r, unsigned int length) { /* Avoid an invalid state */ if (length < 2) diff --git a/include/linux/iio/buffer_impl.h b/include/linux/iio/buffer_impl.h index b9e22b7e2f28..d1171db23742 100644 --- a/include/linux/iio/buffer_impl.h +++ b/include/linux/iio/buffer_impl.h @@ -53,7 +53,7 @@ struct iio_buffer_access_funcs { int (*request_update)(struct iio_buffer *buffer); int (*set_bytes_per_datum)(struct iio_buffer *buffer, size_t bpd); - int (*set_length)(struct iio_buffer *buffer, int length); + int (*set_length)(struct iio_buffer *buffer, unsigned int length); int (*enable)(struct iio_buffer *buffer, struct iio_dev *indio_dev); int (*disable)(struct iio_buffer *buffer, struct iio_dev *indio_dev); @@ -72,10 +72,10 @@ struct iio_buffer_access_funcs { */ struct iio_buffer { /** @length: Number of datums in buffer. */ - int length; + unsigned int length; /** @bytes_per_datum: Size of individual datum including timestamp. */ - int bytes_per_datum; + size_t bytes_per_datum; /** * @access: Buffer access functions associated with the From 838f25e3d9f2ad11e941649c56f8d205a80b01f2 Mon Sep 17 00:00:00 2001 From: Martin Kelly Date: Mon, 26 Mar 2018 14:27:52 -0700 Subject: [PATCH 40/59] iio:kfifo_buf: check for uint overflow commit 3d13de4b027d5f6276c0f9d3a264f518747d83f2 upstream. Currently, the following causes a kernel OOPS in memcpy: echo 1073741825 > buffer/length echo 1 > buffer/enable Note that using 1073741824 instead of 1073741825 causes "write error: Cannot allocate memory" but no OOPS. This is because 1073741824 == 2^30 and 1073741825 == 2^30+1. Since kfifo rounds up to the nearest power of 2, it will actually call kmalloc with roundup_pow_of_two(length) * bytes_per_datum. Using length == 1073741825 and bytes_per_datum == 2, we get: kmalloc(roundup_pow_of_two(1073741825) * 2 or kmalloc(2147483648 * 2) or kmalloc(4294967296) or kmalloc(UINT_MAX + 1) so this overflows to 0, causing kmalloc to return ZERO_SIZE_PTR and subsequent memcpy to fail once the device is enabled. Fix this by checking for overflow prior to allocating a kfifo. With this check added, the above code returns -EINVAL when enabling the buffer, rather than causing an OOPS. Signed-off-by: Martin Kelly cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/buffer/kfifo_buf.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/iio/buffer/kfifo_buf.c b/drivers/iio/buffer/kfifo_buf.c index ac622edf2486..70c302a93d7f 100644 --- a/drivers/iio/buffer/kfifo_buf.c +++ b/drivers/iio/buffer/kfifo_buf.c @@ -27,6 +27,13 @@ static inline int __iio_allocate_kfifo(struct iio_kfifo *buf, if ((length == 0) || (bytes_per_datum == 0)) return -EINVAL; + /* + * Make sure we don't overflow an unsigned int after kfifo rounds up to + * the next power of 2. + */ + if (roundup_pow_of_two(length) > UINT_MAX / bytes_per_datum) + return -EINVAL; + return __kfifo_alloc((struct __kfifo *)&buf->kf, length, bytes_per_datum, GFP_KERNEL); } From 6d67a723ef3788731d5884f0df6a71b745ab21ef Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Mon, 16 Apr 2018 09:54:03 +0300 Subject: [PATCH 41/59] iio: adc: select buffer for at91-sama5d2_adc commit 76974ef9d1bf397b7bb97892a3b3bc516a1fc2c2 upstream. We need to select the buffer code, otherwise we get build errors with undefined functions on the trigger and buffer, if we select just IIO and then AT91_SAMA5D2_ADC from menuconfig This adds a Kconfig 'select' statement like other ADC drivers have it already. Fixes: 5e1a1da0f8c9 ("iio: adc: at91-sama5d2_adc: add hw trigger and buffer support") Signed-off-by: Eugen Hristev Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig index 1d13bf03c758..369a2c632e46 100644 --- a/drivers/iio/adc/Kconfig +++ b/drivers/iio/adc/Kconfig @@ -158,6 +158,7 @@ config AT91_SAMA5D2_ADC tristate "Atmel AT91 SAMA5D2 ADC" depends on ARCH_AT91 || COMPILE_TEST depends on HAS_IOMEM + select IIO_BUFFER select IIO_TRIGGERED_BUFFER help Say yes here to build support for Atmel SAMA5D2 ADC which is From ed5bd13bec35574a9b2c9d978929d70a8c96aaaa Mon Sep 17 00:00:00 2001 From: Mathias Kresin Date: Sun, 8 Apr 2018 10:30:03 +0200 Subject: [PATCH 42/59] MIPS: lantiq: gphy: Drop reboot/remove reset asserts commit 32795631e67e16141aa5e065c28ba03bf17abb90 upstream. While doing a global software reset, these bits are not cleared and let some bootloader fail to initialise the GPHYs. The bootloader don't expect the GPHYs in reset, as they aren't during power on. The asserts were a workaround for a wrong syscon-reboot mask. With a mask set which includes the GPHY resets, these resets aren't required any more. Fixes: 126534141b45 ("MIPS: lantiq: Add a GPHY driver which uses the RCU syscon-mfd") Signed-off-by: Mathias Kresin Acked-by: Martin Blumenstingl Acked-by: Hauke Mehrtens Cc: John Crispin Cc: linux-mips@linux-mips.org Cc: # 4.14+ Patchwork: https://patchwork.linux-mips.org/patch/19003/ [jhogan@kernel.org: Fix build warnings] Signed-off-by: James Hogan Signed-off-by: Greg Kroah-Hartman --- drivers/soc/lantiq/gphy.c | 36 ------------------------------------ 1 file changed, 36 deletions(-) diff --git a/drivers/soc/lantiq/gphy.c b/drivers/soc/lantiq/gphy.c index 8d8659463b3e..feeb17cebc25 100644 --- a/drivers/soc/lantiq/gphy.c +++ b/drivers/soc/lantiq/gphy.c @@ -30,7 +30,6 @@ struct xway_gphy_priv { struct clk *gphy_clk_gate; struct reset_control *gphy_reset; struct reset_control *gphy_reset2; - struct notifier_block gphy_reboot_nb; void __iomem *membase; char *fw_name; }; @@ -64,24 +63,6 @@ static const struct of_device_id xway_gphy_match[] = { }; MODULE_DEVICE_TABLE(of, xway_gphy_match); -static struct xway_gphy_priv *to_xway_gphy_priv(struct notifier_block *nb) -{ - return container_of(nb, struct xway_gphy_priv, gphy_reboot_nb); -} - -static int xway_gphy_reboot_notify(struct notifier_block *reboot_nb, - unsigned long code, void *unused) -{ - struct xway_gphy_priv *priv = to_xway_gphy_priv(reboot_nb); - - if (priv) { - reset_control_assert(priv->gphy_reset); - reset_control_assert(priv->gphy_reset2); - } - - return NOTIFY_DONE; -} - static int xway_gphy_load(struct device *dev, struct xway_gphy_priv *priv, dma_addr_t *dev_addr) { @@ -205,14 +186,6 @@ static int xway_gphy_probe(struct platform_device *pdev) reset_control_deassert(priv->gphy_reset); reset_control_deassert(priv->gphy_reset2); - /* assert the gphy reset because it can hang after a reboot: */ - priv->gphy_reboot_nb.notifier_call = xway_gphy_reboot_notify; - priv->gphy_reboot_nb.priority = -1; - - ret = register_reboot_notifier(&priv->gphy_reboot_nb); - if (ret) - dev_warn(dev, "Failed to register reboot notifier\n"); - platform_set_drvdata(pdev, priv); return ret; @@ -220,21 +193,12 @@ static int xway_gphy_probe(struct platform_device *pdev) static int xway_gphy_remove(struct platform_device *pdev) { - struct device *dev = &pdev->dev; struct xway_gphy_priv *priv = platform_get_drvdata(pdev); - int ret; - - reset_control_assert(priv->gphy_reset); - reset_control_assert(priv->gphy_reset2); iowrite32be(0, priv->membase); clk_disable_unprepare(priv->gphy_clk_gate); - ret = unregister_reboot_notifier(&priv->gphy_reboot_nb); - if (ret) - dev_warn(dev, "Failed to unregister reboot notifier\n"); - return 0; } From f7a36d7ac838321b949bcb66ecbe9ebcba6f5c64 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Wed, 16 May 2018 16:39:58 +0100 Subject: [PATCH 43/59] MIPS: ptrace: Fix PTRACE_PEEKUSR requests for 64-bit FGRs commit c7e814628df65f424fe197dde73bfc67e4a244d7 upstream. Use 64-bit accesses for 64-bit floating-point general registers with PTRACE_PEEKUSR, removing the truncation of their upper halves in the FR=1 mode, caused by commit bbd426f542cb ("MIPS: Simplify FP context access"), which inadvertently switched them to using 32-bit accesses. The PTRACE_POKEUSR side is fine as it's never been broken and continues using 64-bit accesses. Fixes: bbd426f542cb ("MIPS: Simplify FP context access") Signed-off-by: Maciej W. Rozycki Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: # 3.15+ Patchwork: https://patchwork.linux-mips.org/patch/19334/ Signed-off-by: James Hogan Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/ptrace.c | 2 +- arch/mips/kernel/ptrace32.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 006105fb12fe..e058cd300713 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -809,7 +809,7 @@ long arch_ptrace(struct task_struct *child, long request, break; } #endif - tmp = get_fpr32(&fregs[addr - FPR_BASE], 0); + tmp = get_fpr64(&fregs[addr - FPR_BASE], 0); break; case PC: tmp = regs->cp0_epc; diff --git a/arch/mips/kernel/ptrace32.c b/arch/mips/kernel/ptrace32.c index 4a157d3249ac..89026d33a07b 100644 --- a/arch/mips/kernel/ptrace32.c +++ b/arch/mips/kernel/ptrace32.c @@ -108,7 +108,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, addr & 1); break; } - tmp = get_fpr32(&fregs[addr - FPR_BASE], 0); + tmp = get_fpr64(&fregs[addr - FPR_BASE], 0); break; case PC: tmp = regs->cp0_epc; From 8a6576219da8df6c72e6123f1313ad77d8c92288 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Tue, 15 May 2018 23:04:44 +0100 Subject: [PATCH 44/59] MIPS: prctl: Disallow FRE without FR with PR_SET_FP_MODE requests commit 28e4213dd331e944e7fca1954a946829162ed9d4 upstream. Having PR_FP_MODE_FRE (i.e. Config5.FRE) set without PR_FP_MODE_FR (i.e. Status.FR) is not supported as the lone purpose of Config5.FRE is to emulate Status.FR=0 handling on FPU hardware that has Status.FR=1 hardwired[1][2]. Also we do not handle this case elsewhere, and assume throughout our code that TIF_HYBRID_FPREGS and TIF_32BIT_FPREGS cannot be set both at once for a task, leading to inconsistent behaviour if this does happen. Return unsuccessfully then from prctl(2) PR_SET_FP_MODE calls requesting PR_FP_MODE_FRE to be set with PR_FP_MODE_FR clear. This corresponds to modes allowed by `mips_set_personality_fp'. References: [1] "MIPS Architecture For Programmers, Vol. III: MIPS32 / microMIPS32 Privileged Resource Architecture", Imagination Technologies, Document Number: MD00090, Revision 6.02, July 10, 2015, Table 9.69 "Config5 Register Field Descriptions", p. 262 [2] "MIPS Architecture For Programmers, Volume III: MIPS64 / microMIPS64 Privileged Resource Architecture", Imagination Technologies, Document Number: MD00091, Revision 6.03, December 22, 2015, Table 9.72 "Config5 Register Field Descriptions", p. 288 Fixes: 9791554b45a2 ("MIPS,prctl: add PR_[GS]ET_FP_MODE prctl options for MIPS") Signed-off-by: Maciej W. Rozycki Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: # 4.0+ Patchwork: https://patchwork.linux-mips.org/patch/19327/ Signed-off-by: James Hogan Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/process.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 2f2d176396aa..e1ddb94a6522 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -721,6 +721,10 @@ int mips_set_process_fp_mode(struct task_struct *task, unsigned int value) if (value & ~known_bits) return -EOPNOTSUPP; + /* Setting FRE without FR is not supported. */ + if ((value & (PR_FP_MODE_FR | PR_FP_MODE_FRE)) == PR_FP_MODE_FRE) + return -EOPNOTSUPP; + /* Avoid inadvertently triggering emulation */ if ((value & PR_FP_MODE_FR) && raw_cpu_has_fpu && !(raw_current_cpu_data.fpu_id & MIPS_FPIR_F64)) From a70f19b29560be3311e07692b2ef13d2c93ea306 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 21 May 2018 11:17:29 -0700 Subject: [PATCH 45/59] scsi: scsi_transport_srp: Fix shost to rport translation commit c9ddf73476ff4fffb7a87bd5107a0705bf2cf64b upstream. Since an SRP remote port is attached as a child to shost->shost_gendev and as the only child, the translation from the shost pointer into an rport pointer must happen by looking up the shost child that is an rport. This patch fixes the following KASAN complaint: BUG: KASAN: slab-out-of-bounds in srp_timed_out+0x57/0x110 [scsi_transport_srp] Read of size 4 at addr ffff880035d3fcc0 by task kworker/1:0H/19 CPU: 1 PID: 19 Comm: kworker/1:0H Not tainted 4.16.0-rc3-dbg+ #1 Workqueue: kblockd blk_mq_timeout_work Call Trace: dump_stack+0x85/0xc7 print_address_description+0x65/0x270 kasan_report+0x231/0x350 srp_timed_out+0x57/0x110 [scsi_transport_srp] scsi_times_out+0xc7/0x3f0 [scsi_mod] blk_mq_terminate_expired+0xc2/0x140 bt_iter+0xbc/0xd0 blk_mq_queue_tag_busy_iter+0x1c7/0x350 blk_mq_timeout_work+0x325/0x3f0 process_one_work+0x441/0xa50 worker_thread+0x76/0x6c0 kthread+0x1b2/0x1d0 ret_from_fork+0x24/0x30 Fixes: e68ca75200fe ("scsi_transport_srp: Reduce failover time") Signed-off-by: Bart Van Assche Cc: Hannes Reinecke Cc: Johannes Thumshirn Cc: Jason Gunthorpe Cc: Doug Ledford Cc: Laurence Oberman Cc: stable@vger.kernel.org Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi_transport_srp.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c index 36f6190931bc..456ce9f19569 100644 --- a/drivers/scsi/scsi_transport_srp.c +++ b/drivers/scsi/scsi_transport_srp.c @@ -51,6 +51,8 @@ struct srp_internal { struct transport_container rport_attr_cont; }; +static int scsi_is_srp_rport(const struct device *dev); + #define to_srp_internal(tmpl) container_of(tmpl, struct srp_internal, t) #define dev_to_rport(d) container_of(d, struct srp_rport, dev) @@ -60,9 +62,24 @@ static inline struct Scsi_Host *rport_to_shost(struct srp_rport *r) return dev_to_shost(r->dev.parent); } +static int find_child_rport(struct device *dev, void *data) +{ + struct device **child = data; + + if (scsi_is_srp_rport(dev)) { + WARN_ON_ONCE(*child); + *child = dev; + } + return 0; +} + static inline struct srp_rport *shost_to_rport(struct Scsi_Host *shost) { - return transport_class_to_srp_rport(&shost->shost_gendev); + struct device *child = NULL; + + WARN_ON_ONCE(device_for_each_child(&shost->shost_gendev, &child, + find_child_rport) < 0); + return child ? dev_to_rport(child) : NULL; } /** @@ -600,7 +617,8 @@ enum blk_eh_timer_return srp_timed_out(struct scsi_cmnd *scmd) struct srp_rport *rport = shost_to_rport(shost); pr_debug("timeout for sdev %s\n", dev_name(&sdev->sdev_gendev)); - return rport->fast_io_fail_tmo < 0 && rport->dev_loss_tmo < 0 && + return rport && rport->fast_io_fail_tmo < 0 && + rport->dev_loss_tmo < 0 && i->f->reset_timer_if_blocked && scsi_device_blocked(sdev) ? BLK_EH_RESET_TIMER : BLK_EH_NOT_HANDLED; } From 1f8c4ed2dba50361ab2043ca67db7a529560b9d0 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Thu, 24 May 2018 11:27:26 +0300 Subject: [PATCH 46/59] stm class: Use vmalloc for the master map commit b5e2ced9bf81393034072dd4d372f6b430bc1f0a upstream. Fengguang is running into a warning from the buddy allocator: > swapper/0: page allocation failure: order:9, mode:0x14040c0(GFP_KERNEL|__GFP_COMP), nodemask=(null) > CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.17.0-rc1 #262 > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 > Call Trace: ... > __kmalloc+0x14b/0x180: ____cache_alloc at mm/slab.c:3127 > stm_register_device+0xf3/0x5c0: stm_register_device at drivers/hwtracing/stm/core.c:695 ... Which is basically a result of the stm class trying to allocate ~512kB for the dummy_stm with its default parameters. There's no reason, however, for it not to be vmalloc()ed instead, which is what this patch does. Reported-by: Fengguang Wu Signed-off-by: Alexander Shishkin CC: stable@vger.kernel.org # v4.4+ Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/stm/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hwtracing/stm/core.c b/drivers/hwtracing/stm/core.c index f129869e05a9..2de2e1aed9b1 100644 --- a/drivers/hwtracing/stm/core.c +++ b/drivers/hwtracing/stm/core.c @@ -682,7 +682,7 @@ static void stm_device_release(struct device *dev) { struct stm_device *stm = to_stm_device(dev); - kfree(stm); + vfree(stm); } int stm_register_device(struct device *parent, struct stm_data *stm_data, @@ -699,7 +699,7 @@ int stm_register_device(struct device *parent, struct stm_data *stm_data, return -EINVAL; nmasters = stm_data->sw_end - stm_data->sw_start + 1; - stm = kzalloc(sizeof(*stm) + nmasters * sizeof(void *), GFP_KERNEL); + stm = vzalloc(sizeof(*stm) + nmasters * sizeof(void *)); if (!stm) return -ENOMEM; @@ -752,7 +752,7 @@ err_device: /* matches device_initialize() above */ put_device(&stm->dev); err_free: - kfree(stm); + vfree(stm); return err; } From bdf1daba5f621c16bef76075fcf84f2ffdaf383d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 26 May 2018 08:49:24 +0200 Subject: [PATCH 47/59] hwtracing: stm: fix build error on some arches commit 806e30873f0e74d9d41b0ef761bd4d3e55c7d510 upstream. Commit b5e2ced9bf81 ("stm class: Use vmalloc for the master map") caused a build error on some arches as vmalloc.h was not explicitly included. Fix that by adding it to the list of includes. Fixes: b5e2ced9bf81 ("stm class: Use vmalloc for the master map") Reported-by: kbuild test robot Cc: Alexander Shishkin Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/stm/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hwtracing/stm/core.c b/drivers/hwtracing/stm/core.c index 2de2e1aed9b1..736862967e32 100644 --- a/drivers/hwtracing/stm/core.c +++ b/drivers/hwtracing/stm/core.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "stm.h" #include From 4a1b66bcec03fe411be108bc18cfc9bc1b392e38 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Sun, 27 May 2018 14:49:16 +0300 Subject: [PATCH 48/59] IB/core: Fix error code for invalid GID entry commit a840c93ca7582bb6c88df2345a33f979b7a67874 upstream. When a GID entry is invalid EAGAIN is returned. This is an incorrect error code, there is nothing that will make this GID entry valid again in bounded time. Some user space tools fail incorrectly if EAGAIN is returned here, and this represents a small ABI change from earlier kernels. The first patch in the Fixes list makes entries that were valid before to become invalid, allowing this code to trigger, while the second patch in the Fixes list introduced the wrong EAGAIN. Therefore revert the return result to EINVAL which matches the historical expectations of the ibv_query_gid_type() API of the libibverbs user space library. Cc: Fixes: 598ff6bae689 ("IB/core: Refactor GID modify code for RoCE") Fixes: 03db3a2d81e6 ("IB/core: Add RoCE GID table management") Reviewed-by: Daniel Jurgens Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 77515638c55c..896cfd9303b0 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -434,7 +434,7 @@ static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index, return -EINVAL; if (table->data_vec[index].props & GID_TABLE_ENTRY_INVALID) - return -EAGAIN; + return -EINVAL; memcpy(gid, &table->data_vec[index].gid, sizeof(*gid)); if (attr) { From a7027b7d698e1086532efc09270976dbd1ea6f9b Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 1 Jun 2018 16:50:45 -0700 Subject: [PATCH 49/59] mm/huge_memory.c: __split_huge_page() use atomic ClearPageDirty() commit 2d077d4b59924acd1f5180c6fb73b57f4771fde6 upstream. Swapping load on huge=always tmpfs (with khugepaged tuned up to be very eager, but I'm not sure that is relevant) soon hung uninterruptibly, waiting for page lock in shmem_getpage_gfp()'s find_lock_entry(), most often when "cp -a" was trying to write to a smallish file. Debug showed that the page in question was not locked, and page->mapping NULL by now, but page->index consistent with having been in a huge page before. Reproduced in minutes on a 4.15 kernel, even with 4.17's 605ca5ede764 ("mm/huge_memory.c: reorder operations in __split_huge_page_tail()") added in; but took hours to reproduce on a 4.17 kernel (no idea why). The culprit proved to be the __ClearPageDirty() on tails beyond i_size in __split_huge_page(): the non-atomic __bitoperation may have been safe when 4.8's baa355fd3314 ("thp: file pages support for split_huge_page()") introduced it, but liable to erase PageWaiters after 4.10's 62906027091f ("mm: add PageWaiters indicating tasks are waiting for a page bit"). Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1805291841070.3197@eggly.anvils Fixes: 62906027091f ("mm: add PageWaiters indicating tasks are waiting for a page bit") Signed-off-by: Hugh Dickins Acked-by: Kirill A. Shutemov Cc: Konstantin Khlebnikov Cc: Nicholas Piggin Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/huge_memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index e774898c91d5..8af604f3b370 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2388,7 +2388,7 @@ static void __split_huge_page(struct page *page, struct list_head *list, __split_huge_page_tail(head, i, lruvec, list); /* Some pages can be beyond i_size: drop them from page cache */ if (head[i].index >= end) { - __ClearPageDirty(head + i); + ClearPageDirty(head + i); __delete_from_page_cache(head + i, NULL); if (IS_ENABLED(CONFIG_SHMEM) && PageSwapBacked(head)) shmem_uncharge(head->mapping->host, 1); From 792be048cf9c8e733733980fc8e20e0a469e9291 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Mon, 28 May 2018 13:25:06 +0200 Subject: [PATCH 50/59] Revert "rt2800: use TXOP_BACKOFF for probe frames" commit 52a192362932f333a7ebafd581c4d9b81da2fec8 upstream. This reverts commit fb47ada8dc3c30c8e7b415da155742b49536c61e. In some situations when we set TXOP_BACKOFF, the probe frame is not sent at all. What it worse then sending probe frame as part of AMPDU and can degrade 11n performance to 11g rates. Cc: stable@vger.kernel.org Signed-off-by: Stanislaw Gruszka Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ralink/rt2x00/rt2x00queue.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00queue.c b/drivers/net/wireless/ralink/rt2x00/rt2x00queue.c index a2c1ca5c76d1..e1660b92b20c 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00queue.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00queue.c @@ -372,16 +372,15 @@ static void rt2x00queue_create_tx_descriptor_ht(struct rt2x00_dev *rt2x00dev, /* * Determine IFS values - * - Use TXOP_BACKOFF for probe and management frames except beacons + * - Use TXOP_BACKOFF for management frames except beacons * - Use TXOP_SIFS for fragment bursts * - Use TXOP_HTTXOP for everything else * * Note: rt2800 devices won't use CTS protection (if used) * for frames not transmitted with TXOP_HTTXOP */ - if ((ieee80211_is_mgmt(hdr->frame_control) && - !ieee80211_is_beacon(hdr->frame_control)) || - (tx_info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE)) + if (ieee80211_is_mgmt(hdr->frame_control) && + !ieee80211_is_beacon(hdr->frame_control)) txdesc->u.ht.txop = TXOP_BACKOFF; else if (!(tx_info->flags & IEEE80211_TX_CTL_FIRST_FRAGMENT)) txdesc->u.ht.txop = TXOP_SIFS; From 5890358c60e5c508409af63a741c2cab72b07651 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Thu, 24 May 2018 11:27:27 +0300 Subject: [PATCH 51/59] intel_th: Use correct device when freeing buffers commit 0ed2424b911f3a058dfea01b78817abed767433d upstream. Commit d5c435df4a890 ("intel_th: msu: Use the real device in case of IOMMU domain allocation") changes dma buffer allocation to use the actual underlying device, but forgets to change the deallocation path, which leads to (if you've got CAP_SYS_RAWIO): > # echo 0,0 > /sys/bus/intel_th/devices/0-msc0/nr_pages > ------------[ cut here ]------------ > kernel BUG at ../linux/drivers/iommu/intel-iommu.c:3670! > CPU: 3 PID: 231 Comm: sh Not tainted 4.17.0-rc1+ #2729 > RIP: 0010:intel_unmap+0x11e/0x130 ... > Call Trace: > intel_free_coherent+0x3e/0x60 > msc_buffer_win_free+0x100/0x160 [intel_th_msu] This patch fixes the buffer deallocation code to use the correct device. Signed-off-by: Alexander Shishkin Fixes: d5c435df4a890 ("intel_th: msu: Use the real device in case of IOMMU domain allocation") Reported-by: Baofeng Tian CC: stable@vger.kernel.org # v4.14+ Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/intel_th/msu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hwtracing/intel_th/msu.c b/drivers/hwtracing/intel_th/msu.c index dfb57eaa9f22..58ac786634dc 100644 --- a/drivers/hwtracing/intel_th/msu.c +++ b/drivers/hwtracing/intel_th/msu.c @@ -741,8 +741,8 @@ err_nomem: /* Reset the page to write-back before releasing */ set_memory_wb((unsigned long)win->block[i].bdesc, 1); #endif - dma_free_coherent(msc_dev(msc), size, win->block[i].bdesc, - win->block[i].addr); + dma_free_coherent(msc_dev(msc)->parent->parent, size, + win->block[i].bdesc, win->block[i].addr); } kfree(win); @@ -777,7 +777,7 @@ static void msc_buffer_win_free(struct msc *msc, struct msc_window *win) /* Reset the page to write-back before releasing */ set_memory_wb((unsigned long)win->block[i].bdesc, 1); #endif - dma_free_coherent(msc_dev(win->msc), PAGE_SIZE, + dma_free_coherent(msc_dev(win->msc)->parent->parent, PAGE_SIZE, win->block[i].bdesc, win->block[i].addr); } From ffedc7ade784610549592baef51895d96e747b7a Mon Sep 17 00:00:00 2001 From: Dhinakaran Pandiyan Date: Fri, 11 May 2018 12:51:42 -0700 Subject: [PATCH 52/59] drm/psr: Fix missed entry in PSR setup time table. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit bdcc02cf1bb508fc700df7662f55058f651f2621 upstream. Entry corresponding to 220 us setup time was missing. I am not aware of any specific bug this fixes, but this could potentially result in enabling PSR on a panel with a higher setup time requirement than supported by the hardware. I verified the value is present in eDP spec versions 1.3, 1.4 and 1.4a. Fixes: 6608804b3d7f ("drm/dp: Add drm_dp_psr_setup_time()") Cc: stable@vger.kernel.org Cc: Ville Syrjälä Cc: Jose Roberto de Souza Cc: dri-devel@lists.freedesktop.org Reviewed-by: José Roberto de Souza Reviewed-by: Tarun Vyas Signed-off-by: Dhinakaran Pandiyan Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20180511195145.3829-3-dhinakaran.pandiyan@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_dp_helper.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c index 08af8d6b844b..493d8f56d14e 100644 --- a/drivers/gpu/drm/drm_dp_helper.c +++ b/drivers/gpu/drm/drm_dp_helper.c @@ -1139,6 +1139,7 @@ int drm_dp_psr_setup_time(const u8 psr_cap[EDP_PSR_RECEIVER_CAP_SIZE]) static const u16 psr_setup_time_us[] = { PSR_SETUP_TIME(330), PSR_SETUP_TIME(275), + PSR_SETUP_TIME(220), PSR_SETUP_TIME(165), PSR_SETUP_TIME(110), PSR_SETUP_TIME(55), From 72571f26757ee81afed5716bad0b4a27cc1bf9f8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 18 May 2018 08:48:40 +0100 Subject: [PATCH 53/59] drm/i915/lvds: Move acpi lid notification registration to registration phase MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b9eb9c92899a509fe258d38dd6c214b1de69eee0 upstream. Delay registering ourselves with the acpi lid notification mechanism until we are registering the connectors after initialisation is complete. This prevents a possibility of trying to handle the lid notification before we are ready with the danger of chasing uninitialised function pointers. BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 IP: (null) PGD 0 P4D 0 Oops: 0010 [#1] PREEMPT SMP PTI Modules linked in: arc4(+) iwldvm(+) i915(+) mac80211 i2c_algo_bit coretemp mei_wdt iwlwifi drm_kms_helper kvm_intel wmi_bmof iTCO_wdt iTCO_vendor_support kvm snd_hda_codec_conexant snd_hda_codec_generic drm psmouse cfg80211 irqbypass input_leds pcspkr i2c_i801 snd_hda_intel snd_hda_codec thinkpad_acpi snd_hda_core mei_me lpc_ich snd_hwdep e1000e wmi nvram snd_pcm mei snd_timer shpchp ptp pps_core rfkill syscopyarea snd intel_agp sysfillrect intel_gtt soundcore sysimgblt battery led_class fb_sys_fops ac rtc_cmos agpgart evdev mac_hid acpi_cpufreq ip_tables x_tables ext4 crc32c_generic crc16 mbcache jbd2 fscrypto crypto_simd glue_helper cryptd aes_x86_64 xts algif_skcipher af_alg dm_crypt dm_mod sd_mod uas usb_storage serio_raw atkbd libps2 ahci libahci uhci_hcd libata scsi_mod ehci_pci ehci_hcd usbcore usb_common i8042 serio CPU: 1 PID: 378 Comm: systemd-logind Not tainted 4.16.8-1-ARCH #1 Hardware name: LENOVO 7454CTO/7454CTO, BIOS 6DET72WW (3.22 ) 10/25/2012 RIP: 0010: (null) RSP: 0018:ffffaf4580c33a18 EFLAGS: 00010287 RAX: 0000000000000000 RBX: ffff947533558000 RCX: 000000000000003e RDX: ffffffffc0aa80c0 RSI: ffffaf4580c33a3c RDI: ffff947534e4c000 RBP: ffff947533558338 R08: ffff947534598930 R09: ffffffffc0a928b1 R10: ffffd8f181d5fd40 R11: 0000000000000000 R12: ffffffffc0a928b1 R13: ffff947533558368 R14: ffffffffc0a928a9 R15: ffff947534e4c000 FS: 00007f3dc4ddb940(0000) GS:ffff947539280000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000006e214000 CR4: 00000000000406e0 Call Trace: ? intel_modeset_setup_hw_state+0x385/0xf60 [i915] ? __intel_display_resume+0x1e/0xc0 [i915] ? intel_display_resume+0xcc/0x120 [i915] ? intel_lid_notify+0xbc/0xc0 [i915] ? notifier_call_chain+0x47/0x70 ? blocking_notifier_call_chain+0x3e/0x60 ? acpi_lid_notify_state+0x8f/0x1d0 ? acpi_lid_update_state+0x49/0x70 ? acpi_lid_input_open+0x60/0x90 ? input_open_device+0x5d/0xa0 ? evdev_open+0x1ba/0x1e0 [evdev] ? chrdev_open+0xa3/0x1b0 ? cdev_put.part.0+0x20/0x20 ? do_dentry_open+0x14c/0x300 ? path_openat+0x30c/0x1240 ? current_time+0x16/0x60 ? do_filp_open+0x93/0x100 ? __check_object_size+0xfb/0x180 ? do_sys_open+0x186/0x210 ? do_syscall_64+0x74/0x190 ? entry_SYSCALL_64_after_hwframe+0x3d/0xa2 Code: Bad RIP value. RIP: (null) RSP: ffffaf4580c33a18 CR2: 0000000000000000 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106559 Fixes: c1c7af608920 ("drm/i915: force mode set at lid open time") Signed-off-by: Chris Wilson Cc: Maarten Lankhorst Cc: Ville Syrjälä Cc: Daniel Vetter Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20180518074840.16194-1-chris@chris-wilson.co.uk Cc: stable@vger.kernel.org (cherry picked from commit e578a570dc7c20475774d1ff993825e3bd7a7011) Signed-off-by: Joonas Lahtinen Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_lvds.c | 43 +++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 240308f1b6dd..b898f010b189 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -565,6 +565,36 @@ exit: return NOTIFY_OK; } +static int +intel_lvds_connector_register(struct drm_connector *connector) +{ + struct intel_lvds_connector *lvds = to_lvds_connector(connector); + int ret; + + ret = intel_connector_register(connector); + if (ret) + return ret; + + lvds->lid_notifier.notifier_call = intel_lid_notify; + if (acpi_lid_notifier_register(&lvds->lid_notifier)) { + DRM_DEBUG_KMS("lid notifier registration failed\n"); + lvds->lid_notifier.notifier_call = NULL; + } + + return 0; +} + +static void +intel_lvds_connector_unregister(struct drm_connector *connector) +{ + struct intel_lvds_connector *lvds = to_lvds_connector(connector); + + if (lvds->lid_notifier.notifier_call) + acpi_lid_notifier_unregister(&lvds->lid_notifier); + + intel_connector_unregister(connector); +} + /** * intel_lvds_destroy - unregister and free LVDS structures * @connector: connector to free @@ -577,9 +607,6 @@ static void intel_lvds_destroy(struct drm_connector *connector) struct intel_lvds_connector *lvds_connector = to_lvds_connector(connector); - if (lvds_connector->lid_notifier.notifier_call) - acpi_lid_notifier_unregister(&lvds_connector->lid_notifier); - if (!IS_ERR_OR_NULL(lvds_connector->base.edid)) kfree(lvds_connector->base.edid); @@ -600,8 +627,8 @@ static const struct drm_connector_funcs intel_lvds_connector_funcs = { .fill_modes = drm_helper_probe_single_connector_modes, .atomic_get_property = intel_digital_connector_atomic_get_property, .atomic_set_property = intel_digital_connector_atomic_set_property, - .late_register = intel_connector_register, - .early_unregister = intel_connector_unregister, + .late_register = intel_lvds_connector_register, + .early_unregister = intel_lvds_connector_unregister, .destroy = intel_lvds_destroy, .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, .atomic_duplicate_state = intel_digital_connector_duplicate_state, @@ -1149,12 +1176,6 @@ out: lvds_encoder->a3_power = lvds & LVDS_A3_POWER_MASK; - lvds_connector->lid_notifier.notifier_call = intel_lid_notify; - if (acpi_lid_notifier_register(&lvds_connector->lid_notifier)) { - DRM_DEBUG_KMS("lid notifier registration failed\n"); - lvds_connector->lid_notifier.notifier_call = NULL; - } - return; failed: From c95c5f419e57a38b85e762e07b1e6a4bf9a6cb6e Mon Sep 17 00:00:00 2001 From: Ondrej Zary Date: Fri, 9 Mar 2018 23:22:04 +0100 Subject: [PATCH 54/59] drm/i915: Disable LVDS on Radiant P845 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b3fb22733ae61050f8d10a1d6a8af176c5c5db1a upstream. Radiant P845 does not have LVDS, only VGA. Cc: stable@vger.kernel.org Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105468 Signed-off-by: Ondrej Zary Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180309222204.4771-1-linux@rainbow-software.org (cherry picked from commit 7f7105f99b75aca4f8c2a748ed6b82c7f8be3293) Signed-off-by: Joonas Lahtinen Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_lvds.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index b898f010b189..dae4e22a2c3f 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -845,6 +845,14 @@ static const struct dmi_system_id intel_no_lvds[] = { DMI_EXACT_MATCH(DMI_BOARD_NAME, "D525MW"), }, }, + { + .callback = intel_no_lvds_dmi_callback, + .ident = "Radiant P845", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Radiant Systems Inc"), + DMI_MATCH(DMI_PRODUCT_NAME, "P845"), + }, + }, { } /* terminating entry */ }; From e14db4feb0351172ed7ab48040f9527b6b2ad62a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 22 Feb 2018 15:27:20 +0100 Subject: [PATCH 55/59] powerpc/mm/slice: Remove intermediate bitmap copy commit 326691ad4f179e6edc7eb1271e618dd673e4736d upstream. bitmap_or() and bitmap_andnot() can work properly with dst identical to src1 or src2. There is no need of an intermediate result bitmap that is copied back to dst in a second step. Signed-off-by: Christophe Leroy Reviewed-by: Aneesh Kumar K.V Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/mm/slice.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index a4f93699194b..b79897bb89e3 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -379,21 +379,17 @@ static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len, static inline void slice_or_mask(struct slice_mask *dst, struct slice_mask *src) { - DECLARE_BITMAP(result, SLICE_NUM_HIGH); - dst->low_slices |= src->low_slices; - bitmap_or(result, dst->high_slices, src->high_slices, SLICE_NUM_HIGH); - bitmap_copy(dst->high_slices, result, SLICE_NUM_HIGH); + bitmap_or(dst->high_slices, dst->high_slices, src->high_slices, + SLICE_NUM_HIGH); } static inline void slice_andnot_mask(struct slice_mask *dst, struct slice_mask *src) { - DECLARE_BITMAP(result, SLICE_NUM_HIGH); - dst->low_slices &= ~src->low_slices; - bitmap_andnot(result, dst->high_slices, src->high_slices, SLICE_NUM_HIGH); - bitmap_copy(dst->high_slices, result, SLICE_NUM_HIGH); + bitmap_andnot(dst->high_slices, dst->high_slices, src->high_slices, + SLICE_NUM_HIGH); } #ifdef CONFIG_PPC_64K_PAGES From b8b23e8926b3bf387f44f89083a16010eac744de Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 22 Feb 2018 15:27:22 +0100 Subject: [PATCH 56/59] powerpc/mm/slice: create header files dedicated to slices commit a3286f05bc5a5bc7fc73a9783ec89de78fcd07f8 upstream. In preparation for the following patch which will enhance 'slices' for supporting PPC32 in order to fix an issue on hugepages on 8xx, this patch takes out of page*.h all bits related to 'slices' and put them into newly created slice.h header files. While common parts go into asm/slice.h, subarch specific parts go into respective books3s/64/slice.c and nohash/64/slice.c 'slices' Signed-off-by: Christophe Leroy Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/book3s/64/slice.h | 27 ++++++++++ arch/powerpc/include/asm/nohash/64/slice.h | 12 +++++ arch/powerpc/include/asm/page.h | 1 + arch/powerpc/include/asm/page_64.h | 59 ---------------------- arch/powerpc/include/asm/slice.h | 40 +++++++++++++++ 5 files changed, 80 insertions(+), 59 deletions(-) create mode 100644 arch/powerpc/include/asm/book3s/64/slice.h create mode 100644 arch/powerpc/include/asm/nohash/64/slice.h create mode 100644 arch/powerpc/include/asm/slice.h diff --git a/arch/powerpc/include/asm/book3s/64/slice.h b/arch/powerpc/include/asm/book3s/64/slice.h new file mode 100644 index 000000000000..db0dedab65ee --- /dev/null +++ b/arch/powerpc/include/asm/book3s/64/slice.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_BOOK3S_64_SLICE_H +#define _ASM_POWERPC_BOOK3S_64_SLICE_H + +#ifdef CONFIG_PPC_MM_SLICES + +#define SLICE_LOW_SHIFT 28 +#define SLICE_LOW_TOP (0x100000000ul) +#define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT) +#define GET_LOW_SLICE_INDEX(addr) ((addr) >> SLICE_LOW_SHIFT) + +#define SLICE_HIGH_SHIFT 40 +#define SLICE_NUM_HIGH (H_PGTABLE_RANGE >> SLICE_HIGH_SHIFT) +#define GET_HIGH_SLICE_INDEX(addr) ((addr) >> SLICE_HIGH_SHIFT) + +#else /* CONFIG_PPC_MM_SLICES */ + +#define get_slice_psize(mm, addr) ((mm)->context.user_psize) +#define slice_set_user_psize(mm, psize) \ +do { \ + (mm)->context.user_psize = (psize); \ + (mm)->context.sllp = SLB_VSID_USER | mmu_psize_defs[(psize)].sllp; \ +} while (0) + +#endif /* CONFIG_PPC_MM_SLICES */ + +#endif /* _ASM_POWERPC_BOOK3S_64_SLICE_H */ diff --git a/arch/powerpc/include/asm/nohash/64/slice.h b/arch/powerpc/include/asm/nohash/64/slice.h new file mode 100644 index 000000000000..ad0d6e3cc1c5 --- /dev/null +++ b/arch/powerpc/include/asm/nohash/64/slice.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_NOHASH_64_SLICE_H +#define _ASM_POWERPC_NOHASH_64_SLICE_H + +#ifdef CONFIG_PPC_64K_PAGES +#define get_slice_psize(mm, addr) MMU_PAGE_64K +#else /* CONFIG_PPC_64K_PAGES */ +#define get_slice_psize(mm, addr) MMU_PAGE_4K +#endif /* !CONFIG_PPC_64K_PAGES */ +#define slice_set_user_psize(mm, psize) do { BUG(); } while (0) + +#endif /* _ASM_POWERPC_NOHASH_64_SLICE_H */ diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 8da5d4c1cab2..d5f1c41b7dba 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -344,5 +344,6 @@ typedef struct page *pgtable_t; #include #endif /* __ASSEMBLY__ */ +#include #endif /* _ASM_POWERPC_PAGE_H */ diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h index c4d9654bd637..af04acdb873f 100644 --- a/arch/powerpc/include/asm/page_64.h +++ b/arch/powerpc/include/asm/page_64.h @@ -86,65 +86,6 @@ extern u64 ppc64_pft_size; #endif /* __ASSEMBLY__ */ -#ifdef CONFIG_PPC_MM_SLICES - -#define SLICE_LOW_SHIFT 28 -#define SLICE_HIGH_SHIFT 40 - -#define SLICE_LOW_TOP (0x100000000ul) -#define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT) -#define SLICE_NUM_HIGH (H_PGTABLE_RANGE >> SLICE_HIGH_SHIFT) - -#define GET_LOW_SLICE_INDEX(addr) ((addr) >> SLICE_LOW_SHIFT) -#define GET_HIGH_SLICE_INDEX(addr) ((addr) >> SLICE_HIGH_SHIFT) - -#ifndef __ASSEMBLY__ -struct mm_struct; - -extern unsigned long slice_get_unmapped_area(unsigned long addr, - unsigned long len, - unsigned long flags, - unsigned int psize, - int topdown); - -extern unsigned int get_slice_psize(struct mm_struct *mm, - unsigned long addr); - -extern void slice_set_user_psize(struct mm_struct *mm, unsigned int psize); -extern void slice_set_range_psize(struct mm_struct *mm, unsigned long start, - unsigned long len, unsigned int psize); - -#endif /* __ASSEMBLY__ */ -#else -#define slice_init() -#ifdef CONFIG_PPC_STD_MMU_64 -#define get_slice_psize(mm, addr) ((mm)->context.user_psize) -#define slice_set_user_psize(mm, psize) \ -do { \ - (mm)->context.user_psize = (psize); \ - (mm)->context.sllp = SLB_VSID_USER | mmu_psize_defs[(psize)].sllp; \ -} while (0) -#else /* CONFIG_PPC_STD_MMU_64 */ -#ifdef CONFIG_PPC_64K_PAGES -#define get_slice_psize(mm, addr) MMU_PAGE_64K -#else /* CONFIG_PPC_64K_PAGES */ -#define get_slice_psize(mm, addr) MMU_PAGE_4K -#endif /* !CONFIG_PPC_64K_PAGES */ -#define slice_set_user_psize(mm, psize) do { BUG(); } while(0) -#endif /* !CONFIG_PPC_STD_MMU_64 */ - -#define slice_set_range_psize(mm, start, len, psize) \ - slice_set_user_psize((mm), (psize)) -#endif /* CONFIG_PPC_MM_SLICES */ - -#ifdef CONFIG_HUGETLB_PAGE - -#ifdef CONFIG_PPC_MM_SLICES -#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA -#endif - -#endif /* !CONFIG_HUGETLB_PAGE */ - #define VM_DATA_DEFAULT_FLAGS \ (is_32bit_task() ? \ VM_DATA_DEFAULT_FLAGS32 : VM_DATA_DEFAULT_FLAGS64) diff --git a/arch/powerpc/include/asm/slice.h b/arch/powerpc/include/asm/slice.h new file mode 100644 index 000000000000..17c5a5d8c418 --- /dev/null +++ b/arch/powerpc/include/asm/slice.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_SLICE_H +#define _ASM_POWERPC_SLICE_H + +#ifdef CONFIG_PPC_BOOK3S_64 +#include +#else +#include +#endif + +#ifdef CONFIG_PPC_MM_SLICES + +#ifdef CONFIG_HUGETLB_PAGE +#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA +#endif +#define HAVE_ARCH_UNMAPPED_AREA +#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN + +#ifndef __ASSEMBLY__ + +struct mm_struct; + +unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, + unsigned long flags, unsigned int psize, + int topdown); + +unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr); + +void slice_set_user_psize(struct mm_struct *mm, unsigned int psize); +void slice_set_range_psize(struct mm_struct *mm, unsigned long start, + unsigned long len, unsigned int psize); +#endif /* __ASSEMBLY__ */ + +#else /* CONFIG_PPC_MM_SLICES */ + +#define slice_set_range_psize(mm, start, len, psize) \ + slice_set_user_psize((mm), (psize)) +#endif /* CONFIG_PPC_MM_SLICES */ + +#endif /* _ASM_POWERPC_SLICE_H */ From 399e039634a4e77548aeef75e0f8ce210330f2b7 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 22 Feb 2018 15:27:24 +0100 Subject: [PATCH 57/59] powerpc/mm/slice: Enhance for supporting PPC32 commit db3a528db41caaa6dfd4c64e9f5efb1c81a80467 upstream. In preparation for the following patch which will fix an issue on the 8xx by re-using the 'slices', this patch enhances the 'slices' implementation to support 32 bits CPUs. On PPC32, the address space is limited to 4Gbytes, hence only the low slices will be used. The high slices use bitmaps. As bitmap functions are not prepared to handle bitmaps of size 0, this patch ensures that bitmap functions are called only when SLICE_NUM_HIGH is not nul. Signed-off-by: Christophe Leroy Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/nohash/32/slice.h | 18 +++++++++++ arch/powerpc/include/asm/slice.h | 4 ++- arch/powerpc/mm/slice.c | 37 +++++++++++++++++----- 3 files changed, 50 insertions(+), 9 deletions(-) create mode 100644 arch/powerpc/include/asm/nohash/32/slice.h diff --git a/arch/powerpc/include/asm/nohash/32/slice.h b/arch/powerpc/include/asm/nohash/32/slice.h new file mode 100644 index 000000000000..95d532e18092 --- /dev/null +++ b/arch/powerpc/include/asm/nohash/32/slice.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_NOHASH_32_SLICE_H +#define _ASM_POWERPC_NOHASH_32_SLICE_H + +#ifdef CONFIG_PPC_MM_SLICES + +#define SLICE_LOW_SHIFT 28 +#define SLICE_LOW_TOP (0x100000000ull) +#define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT) +#define GET_LOW_SLICE_INDEX(addr) ((addr) >> SLICE_LOW_SHIFT) + +#define SLICE_HIGH_SHIFT 0 +#define SLICE_NUM_HIGH 0ul +#define GET_HIGH_SLICE_INDEX(addr) (addr & 0) + +#endif /* CONFIG_PPC_MM_SLICES */ + +#endif /* _ASM_POWERPC_NOHASH_32_SLICE_H */ diff --git a/arch/powerpc/include/asm/slice.h b/arch/powerpc/include/asm/slice.h index 17c5a5d8c418..172711fadb1c 100644 --- a/arch/powerpc/include/asm/slice.h +++ b/arch/powerpc/include/asm/slice.h @@ -4,8 +4,10 @@ #ifdef CONFIG_PPC_BOOK3S_64 #include -#else +#elif defined(CONFIG_PPC64) #include +#elif defined(CONFIG_PPC_MMU_NOHASH) +#include #endif #ifdef CONFIG_PPC_MM_SLICES diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index b79897bb89e3..8baaa6c6f21c 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -73,10 +73,12 @@ static void slice_range_to_mask(unsigned long start, unsigned long len, unsigned long end = start + len - 1; ret->low_slices = 0; - bitmap_zero(ret->high_slices, SLICE_NUM_HIGH); + if (SLICE_NUM_HIGH) + bitmap_zero(ret->high_slices, SLICE_NUM_HIGH); if (start < SLICE_LOW_TOP) { - unsigned long mend = min(end, (SLICE_LOW_TOP - 1)); + unsigned long mend = min(end, + (unsigned long)(SLICE_LOW_TOP - 1)); ret->low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1)) - (1u << GET_LOW_SLICE_INDEX(start)); @@ -113,11 +115,13 @@ static int slice_high_has_vma(struct mm_struct *mm, unsigned long slice) unsigned long start = slice << SLICE_HIGH_SHIFT; unsigned long end = start + (1ul << SLICE_HIGH_SHIFT); +#ifdef CONFIG_PPC64 /* Hack, so that each addresses is controlled by exactly one * of the high or low area bitmaps, the first high area starts * at 4GB, not 0 */ if (start == 0) start = SLICE_LOW_TOP; +#endif return !slice_area_is_free(mm, start, end - start); } @@ -127,7 +131,8 @@ static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret) unsigned long i; ret->low_slices = 0; - bitmap_zero(ret->high_slices, SLICE_NUM_HIGH); + if (SLICE_NUM_HIGH) + bitmap_zero(ret->high_slices, SLICE_NUM_HIGH); for (i = 0; i < SLICE_NUM_LOW; i++) if (!slice_low_has_vma(mm, i)) @@ -149,7 +154,8 @@ static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_ma u64 lpsizes; ret->low_slices = 0; - bitmap_zero(ret->high_slices, SLICE_NUM_HIGH); + if (SLICE_NUM_HIGH) + bitmap_zero(ret->high_slices, SLICE_NUM_HIGH); lpsizes = mm->context.low_slices_psize; for (i = 0; i < SLICE_NUM_LOW; i++) @@ -171,6 +177,10 @@ static int slice_check_fit(struct mm_struct *mm, DECLARE_BITMAP(result, SLICE_NUM_HIGH); unsigned long slice_count = GET_HIGH_SLICE_INDEX(mm->context.addr_limit); + if (!SLICE_NUM_HIGH) + return (mask.low_slices & available.low_slices) == + mask.low_slices; + bitmap_and(result, mask.high_slices, available.high_slices, slice_count); @@ -180,6 +190,7 @@ static int slice_check_fit(struct mm_struct *mm, static void slice_flush_segments(void *parm) { +#ifdef CONFIG_PPC64 struct mm_struct *mm = parm; unsigned long flags; @@ -191,6 +202,7 @@ static void slice_flush_segments(void *parm) local_irq_save(flags); slb_flush_and_rebolt(); local_irq_restore(flags); +#endif } static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psize) @@ -380,6 +392,8 @@ static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len, static inline void slice_or_mask(struct slice_mask *dst, struct slice_mask *src) { dst->low_slices |= src->low_slices; + if (!SLICE_NUM_HIGH) + return; bitmap_or(dst->high_slices, dst->high_slices, src->high_slices, SLICE_NUM_HIGH); } @@ -388,6 +402,8 @@ static inline void slice_andnot_mask(struct slice_mask *dst, struct slice_mask * { dst->low_slices &= ~src->low_slices; + if (!SLICE_NUM_HIGH) + return; bitmap_andnot(dst->high_slices, dst->high_slices, src->high_slices, SLICE_NUM_HIGH); } @@ -437,14 +453,17 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, * init different masks */ mask.low_slices = 0; - bitmap_zero(mask.high_slices, SLICE_NUM_HIGH); /* silence stupid warning */; potential_mask.low_slices = 0; - bitmap_zero(potential_mask.high_slices, SLICE_NUM_HIGH); compat_mask.low_slices = 0; - bitmap_zero(compat_mask.high_slices, SLICE_NUM_HIGH); + + if (SLICE_NUM_HIGH) { + bitmap_zero(mask.high_slices, SLICE_NUM_HIGH); + bitmap_zero(potential_mask.high_slices, SLICE_NUM_HIGH); + bitmap_zero(compat_mask.high_slices, SLICE_NUM_HIGH); + } /* Sanity checks */ BUG_ON(mm->task_size == 0); @@ -582,7 +601,9 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, convert: slice_andnot_mask(&mask, &good_mask); slice_andnot_mask(&mask, &compat_mask); - if (mask.low_slices || !bitmap_empty(mask.high_slices, SLICE_NUM_HIGH)) { + if (mask.low_slices || + (SLICE_NUM_HIGH && + !bitmap_empty(mask.high_slices, SLICE_NUM_HIGH))) { slice_convert(mm, mask, psize); if (psize > MMU_PAGE_BASE) on_each_cpu(slice_flush_segments, mm, 1); From 1dd9566d954230d5dccb48d070dc412bc9358ca8 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 22 Feb 2018 15:27:26 +0100 Subject: [PATCH 58/59] powerpc/mm/slice: Fix hugepage allocation at hint address on 8xx commit aa0ab02ba992eb956934b21373e0138211486ddd upstream. On the 8xx, the page size is set in the PMD entry and applies to all pages of the page table pointed by the said PMD entry. When an app has some regular pages allocated (e.g. see below) and tries to mmap() a huge page at a hint address covered by the same PMD entry, the kernel accepts the hint allthough the 8xx cannot handle different page sizes in the same PMD entry. 10000000-10001000 r-xp 00000000 00:0f 2597 /root/malloc 10010000-10011000 rwxp 00000000 00:0f 2597 /root/malloc mmap(0x10080000, 524288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|0x40000, -1, 0) = 0x10080000 This results the app remaining forever in do_page_fault()/hugetlb_fault() and when interrupting that app, we get the following warning: [162980.035629] WARNING: CPU: 0 PID: 2777 at arch/powerpc/mm/hugetlbpage.c:354 hugetlb_free_pgd_range+0xc8/0x1e4 [162980.035699] CPU: 0 PID: 2777 Comm: malloc Tainted: G W 4.14.6 #85 [162980.035744] task: c67e2c00 task.stack: c668e000 [162980.035783] NIP: c000fe18 LR: c00e1eec CTR: c00f90c0 [162980.035830] REGS: c668fc20 TRAP: 0700 Tainted: G W (4.14.6) [162980.035854] MSR: 00029032 CR: 24044224 XER: 20000000 [162980.036003] [162980.036003] GPR00: c00e1eec c668fcd0 c67e2c00 00000010 c6869410 10080000 00000000 77fb4000 [162980.036003] GPR08: ffff0001 0683c001 00000000 ffffff80 44028228 10018a34 00004008 418004fc [162980.036003] GPR16: c668e000 00040100 c668e000 c06c0000 c668fe78 c668e000 c6835ba0 c668fd48 [162980.036003] GPR24: 00000000 73ffffff 74000000 00000001 77fb4000 100fffff 10100000 10100000 [162980.036743] NIP [c000fe18] hugetlb_free_pgd_range+0xc8/0x1e4 [162980.036839] LR [c00e1eec] free_pgtables+0x12c/0x150 [162980.036861] Call Trace: [162980.036939] [c668fcd0] [c00f0774] unlink_anon_vmas+0x1c4/0x214 (unreliable) [162980.037040] [c668fd10] [c00e1eec] free_pgtables+0x12c/0x150 [162980.037118] [c668fd40] [c00eabac] exit_mmap+0xe8/0x1b4 [162980.037210] [c668fda0] [c0019710] mmput.part.9+0x20/0xd8 [162980.037301] [c668fdb0] [c001ecb0] do_exit+0x1f0/0x93c [162980.037386] [c668fe00] [c001f478] do_group_exit+0x40/0xcc [162980.037479] [c668fe10] [c002a76c] get_signal+0x47c/0x614 [162980.037570] [c668fe70] [c0007840] do_signal+0x54/0x244 [162980.037654] [c668ff30] [c0007ae8] do_notify_resume+0x34/0x88 [162980.037744] [c668ff40] [c000dae8] do_user_signal+0x74/0xc4 [162980.037781] Instruction dump: [162980.037821] 7fdff378 81370000 54a3463a 80890020 7d24182e 7c841a14 712a0004 4082ff94 [162980.038014] 2f890000 419e0010 712a0ff0 408200e0 <0fe00000> 54a9000a 7f984840 419d0094 [162980.038216] ---[ end trace c0ceeca8e7a5800a ]--- [162980.038754] BUG: non-zero nr_ptes on freeing mm: 1 [162985.363322] BUG: non-zero nr_ptes on freeing mm: -1 In order to fix this, this patch uses the address space "slices" implemented for BOOK3S/64 and enhanced to support PPC32 by the preceding patch. This patch modifies the context.id on the 8xx to be in the range [1:16] instead of [0:15] in order to identify context.id == 0 as not initialised contexts as done on BOOK3S This patch activates CONFIG_PPC_MM_SLICES when CONFIG_HUGETLB_PAGE is selected for the 8xx Alltough we could in theory have as many slices as PMD entries, the current slices implementation limits the number of low slices to 16. This limitation is not preventing us to fix the initial issue allthough it is suboptimal. It will be cured in a subsequent patch. Fixes: 4b91428699477 ("powerpc/8xx: Implement support of hugepages") Signed-off-by: Christophe Leroy Reviewed-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/mmu-8xx.h | 6 ++++++ arch/powerpc/kernel/setup-common.c | 2 ++ arch/powerpc/mm/8xx_mmu.c | 2 +- arch/powerpc/mm/hugetlbpage.c | 2 ++ arch/powerpc/mm/mmu_context_nohash.c | 18 ++++++++++++++++-- arch/powerpc/platforms/Kconfig.cputype | 1 + 6 files changed, 28 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h index 5bb3dbede41a..1325e5b5f680 100644 --- a/arch/powerpc/include/asm/mmu-8xx.h +++ b/arch/powerpc/include/asm/mmu-8xx.h @@ -169,6 +169,12 @@ typedef struct { unsigned int id; unsigned int active; unsigned long vdso_base; +#ifdef CONFIG_PPC_MM_SLICES + u16 user_psize; /* page size index */ + u64 low_slices_psize; /* page size encodings */ + unsigned char high_slices_psize[0]; + unsigned long addr_limit; +#endif } mm_context_t; #define PHYS_IMMR_BASE (mfspr(SPRN_IMMR) & 0xfff80000) diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index b4fcb54b9686..008447664643 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -915,6 +915,8 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_PPC_MM_SLICES #ifdef CONFIG_PPC64 init_mm.context.addr_limit = DEFAULT_MAP_WINDOW_USER64; +#elif defined(CONFIG_PPC_8xx) + init_mm.context.addr_limit = DEFAULT_MAP_WINDOW; #else #error "context.addr_limit not initialized." #endif diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c index f29212e40f40..0be77709446c 100644 --- a/arch/powerpc/mm/8xx_mmu.c +++ b/arch/powerpc/mm/8xx_mmu.c @@ -192,7 +192,7 @@ void set_context(unsigned long id, pgd_t *pgd) mtspr(SPRN_M_TW, __pa(pgd) - offset); /* Update context */ - mtspr(SPRN_M_CASID, id); + mtspr(SPRN_M_CASID, id - 1); /* sync */ mb(); } diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 1571a498a33f..4c9e5f9c7a44 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -552,9 +552,11 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, struct hstate *hstate = hstate_file(file); int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate)); +#ifdef CONFIG_PPC_RADIX_MMU if (radix_enabled()) return radix__hugetlb_get_unmapped_area(file, addr, len, pgoff, flags); +#endif return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1); } #endif diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index 4554d6527682..e2b28b3a512e 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c @@ -331,6 +331,20 @@ int init_new_context(struct task_struct *t, struct mm_struct *mm) { pr_hard("initing context for mm @%p\n", mm); +#ifdef CONFIG_PPC_MM_SLICES + if (!mm->context.addr_limit) + mm->context.addr_limit = DEFAULT_MAP_WINDOW; + + /* + * We have MMU_NO_CONTEXT set to be ~0. Hence check + * explicitly against context.id == 0. This ensures that we properly + * initialize context slice details for newly allocated mm's (which will + * have id == 0) and don't alter context slice inherited via fork (which + * will have id != 0). + */ + if (mm->context.id == 0) + slice_set_user_psize(mm, mmu_virtual_psize); +#endif mm->context.id = MMU_NO_CONTEXT; mm->context.active = 0; return 0; @@ -428,8 +442,8 @@ void __init mmu_context_init(void) * -- BenH */ if (mmu_has_feature(MMU_FTR_TYPE_8xx)) { - first_context = 0; - last_context = 15; + first_context = 1; + last_context = 16; no_selective_tlbil = true; } else if (mmu_has_feature(MMU_FTR_TYPE_47x)) { first_context = 1; diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index a78f255111f2..3ce376b42330 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -325,6 +325,7 @@ config PPC_BOOK3E_MMU config PPC_MM_SLICES bool default y if PPC_STD_MMU_64 + default y if PPC_8xx && HUGETLB_PAGE default n config PPC_HAVE_PMU_SUPPORT From 2c6025ebc7fd8e0a8ca785d778dc6ae25225744b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 5 Jun 2018 11:42:00 +0200 Subject: [PATCH 59/59] Linux 4.14.48 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a78653846850..7a246f1ce44e 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 47 +SUBLEVEL = 48 EXTRAVERSION = NAME = Petit Gorille