msm-4.14: Revert some Sultan changes

This reverts commits:
- 2d26472b398e2850755e0f447f11762948876758 [memlat: Read perf counters in parallel and reduce system jitter]
- 91164894044c9682faf3da668c06cd588975dc15 [soc: qcom: watchdog_v2: Optimize IPI pings to reduce system jitter]
- f060136685304ae86aad3da35b1a098642d4e8de [smp: Use migrate disable/enable in smp_call_function_single_async()]
- d1155d942a10a5fe166941db835aad60f8e7603d [smp: Make generic_exec_single() public]

Change-Id: I8723ded1aaa7241ace7c56613a7abf6c46b7808d
Signed-off-by: Richard Raya <rdxzv.dev@gmail.com>
This commit is contained in:
Richard Raya 2024-10-08 22:34:01 -03:00
parent cf03101a7e
commit 4b1ae9fdf5
6 changed files with 58 additions and 197 deletions

View File

@ -58,7 +58,6 @@ struct cpu_pmu_stats {
struct cpu_grp_info {
cpumask_t cpus;
unsigned long any_cpu_ev_mask;
unsigned int event_ids[NUM_EVENTS];
struct cpu_pmu_stats *cpustats;
struct memlat_hwmon hw;
@ -68,13 +67,6 @@ struct memlat_mon_spec {
bool is_compute;
};
struct ipi_data {
unsigned long cnts[NR_CPUS][NUM_EVENTS];
struct task_struct *waiter_task;
struct cpu_grp_info *cpu_grp;
atomic_t cpus_left;
};
#define to_cpustats(cpu_grp, cpu) \
(&cpu_grp->cpustats[cpu - cpumask_first(&cpu_grp->cpus)])
#define to_devstats(cpu_grp, cpu) \
@ -101,84 +93,32 @@ static unsigned long compute_freq(struct cpu_pmu_stats *cpustats,
}
#define MAX_COUNT_LIM 0xFFFFFFFFFFFFFFFF
static unsigned long read_event(struct cpu_pmu_stats *cpustats, int event_id)
static inline unsigned long read_event(struct event_data *event)
{
struct event_data *event = &cpustats->events[event_id];
unsigned long ev_count;
u64 total;
u64 total, enabled, running;
if (!event->pevent || perf_event_read_local(event->pevent, &total))
if (!event->pevent)
return 0;
total = perf_event_read_value(event->pevent, &enabled, &running);
ev_count = total - event->prev_count;
event->prev_count = total;
return ev_count;
}
static void read_perf_counters(struct ipi_data *ipd, int cpu)
static void read_perf_counters(int cpu, struct cpu_grp_info *cpu_grp)
{
struct cpu_grp_info *cpu_grp = ipd->cpu_grp;
struct cpu_pmu_stats *cpustats = to_cpustats(cpu_grp, cpu);
int ev;
for (ev = 0; ev < NUM_EVENTS; ev++) {
if (!(cpu_grp->any_cpu_ev_mask & BIT(ev)))
ipd->cnts[cpu][ev] = read_event(cpustats, ev);
}
}
static void read_evs_ipi(void *info)
{
int cpu = raw_smp_processor_id();
struct ipi_data *ipd = info;
struct task_struct *waiter;
read_perf_counters(ipd, cpu);
/*
* Wake up the waiter task if we're the final CPU. The ipi_data pointer
* isn't safe to dereference once cpus_left reaches zero, so the waiter
* task_struct pointer must be cached before that. Also defend against
* the extremely unlikely possibility that the waiter task will have
* exited by the time wake_up_process() is reached.
*/
waiter = ipd->waiter_task;
get_task_struct(waiter);
if (atomic_fetch_andnot(BIT(cpu), &ipd->cpus_left) == BIT(cpu) &&
waiter->state != TASK_RUNNING)
wake_up_process(waiter);
put_task_struct(waiter);
}
static void read_any_cpu_events(struct ipi_data *ipd, unsigned long cpus)
{
struct cpu_grp_info *cpu_grp = ipd->cpu_grp;
int cpu, ev;
if (!cpu_grp->any_cpu_ev_mask)
return;
for_each_cpu(cpu, to_cpumask(&cpus)) {
struct cpu_pmu_stats *cpustats = to_cpustats(cpu_grp, cpu);
for_each_set_bit(ev, &cpu_grp->any_cpu_ev_mask, NUM_EVENTS)
ipd->cnts[cpu][ev] = read_event(cpustats, ev);
}
}
static void compute_perf_counters(struct ipi_data *ipd, int cpu)
{
struct cpu_grp_info *cpu_grp = ipd->cpu_grp;
struct cpu_pmu_stats *cpustats = to_cpustats(cpu_grp, cpu);
struct dev_stats *devstats = to_devstats(cpu_grp, cpu);
unsigned long cyc_cnt, stall_cnt;
devstats->inst_count = ipd->cnts[cpu][INST_IDX];
devstats->mem_count = ipd->cnts[cpu][CM_IDX];
cyc_cnt = ipd->cnts[cpu][CYC_IDX];
devstats->inst_count = read_event(&cpustats->events[INST_IDX]);
devstats->mem_count = read_event(&cpustats->events[CM_IDX]);
cyc_cnt = read_event(&cpustats->events[CYC_IDX]);
devstats->freq = compute_freq(cpustats, cyc_cnt);
if (cpustats->events[STALL_CYC_IDX].pevent) {
stall_cnt = ipd->cnts[cpu][STALL_CYC_IDX];
stall_cnt = read_event(&cpustats->events[STALL_CYC_IDX]);
stall_cnt = min(stall_cnt, cyc_cnt);
devstats->stall_pct = mult_frac(100, stall_cnt, cyc_cnt);
} else {
@ -188,69 +128,19 @@ static void compute_perf_counters(struct ipi_data *ipd, int cpu)
static unsigned long get_cnt(struct memlat_hwmon *hw)
{
int cpu;
struct cpu_grp_info *cpu_grp = to_cpu_grp(hw);
unsigned long cpus_read_mask, tmp_mask;
call_single_data_t csd[NR_CPUS];
struct ipi_data ipd;
int cpu, this_cpu;
ipd.waiter_task = current;
ipd.cpu_grp = cpu_grp;
/* Dispatch asynchronous IPIs to each CPU to read the perf events */
cpus_read_lock();
migrate_disable();
this_cpu = raw_smp_processor_id();
cpus_read_mask = *cpumask_bits(&cpu_grp->cpus);
tmp_mask = cpus_read_mask & ~BIT(this_cpu);
ipd.cpus_left = (atomic_t)ATOMIC_INIT(tmp_mask);
for_each_cpu(cpu, to_cpumask(&tmp_mask)) {
/*
* Some SCM calls take very long (20+ ms), so the IPI could lag
* on the CPU running the SCM call. Skip offline CPUs too.
*/
csd[cpu].flags = 0;
if (under_scm_call(cpu) ||
generic_exec_single(cpu, &csd[cpu], read_evs_ipi, &ipd))
cpus_read_mask &= ~BIT(cpu);
}
cpus_read_unlock();
/* Read this CPU's events while the IPIs run */
if (cpus_read_mask & BIT(this_cpu))
read_perf_counters(&ipd, this_cpu);
migrate_enable();
/* Bail out if there weren't any CPUs available */
if (!cpus_read_mask)
return 0;
/* Read any any-CPU events while the IPIs run */
read_any_cpu_events(&ipd, cpus_read_mask);
/* Clear out CPUs which were skipped */
atomic_andnot(cpus_read_mask ^ tmp_mask, &ipd.cpus_left);
/*
* Wait until all the IPIs are done reading their events, and compute
* each finished CPU's results while waiting since some CPUs may finish
* reading their events faster than others.
* Some of SCM call is very heavy(+20ms) so perf IPI could
* be stuck on the CPU which contributes long latency.
*/
for (tmp_mask = cpus_read_mask;;) {
unsigned long cpus_done, cpus_left;
set_current_state(TASK_UNINTERRUPTIBLE);
cpus_left = (unsigned int)atomic_read(&ipd.cpus_left);
if ((cpus_done = cpus_left ^ tmp_mask)) {
for_each_cpu(cpu, to_cpumask(&cpus_done))
compute_perf_counters(&ipd, cpu);
if (!cpus_left)
break;
tmp_mask = cpus_left;
} else {
schedule();
}
if (under_scm_call()) {
return 0;
}
__set_current_state(TASK_RUNNING);
for_each_cpu(cpu, &cpu_grp->cpus)
read_perf_counters(cpu, cpu_grp);
return 0;
}
@ -327,8 +217,6 @@ static int set_events(struct cpu_grp_info *cpu_grp, int cpu)
goto err_out;
cpustats->events[i].pevent = pevent;
perf_event_enable(pevent);
if (cpumask_equal(&pevent->readable_on_cpus, &CPU_MASK_ALL))
cpu_grp->any_cpu_ev_mask |= BIT(i);
}
kfree(attr);

View File

@ -36,7 +36,7 @@
#define SCM_EBUSY -55
#define SCM_V2_EBUSY -12
static DEFINE_PER_CPU(atomic_t, scm_call_count);
static atomic_t scm_call_count = ATOMIC_INIT(0);
static DEFINE_MUTEX(scm_lock);
/*
@ -433,12 +433,11 @@ static int ___scm_call_armv8_64(u64 x0, u64 x1, u64 x2, u64 x3, u64 x4, u64 x5,
static int __scm_call_armv8_64(u64 x0, u64 x1, u64 x2, u64 x3, u64 x4, u64 x5,
u64 *ret1, u64 *ret2, u64 *ret3)
{
atomic_t *cnt = per_cpu_ptr(&scm_call_count, raw_smp_processor_id());
int ret;
atomic_inc(cnt);
atomic_inc(&scm_call_count);
ret = ___scm_call_armv8_64(x0, x1, x2, x3, x4, x5, ret1, ret2, ret3);
atomic_dec(cnt);
atomic_dec(&scm_call_count);
return ret;
}
@ -496,12 +495,11 @@ static int ___scm_call_armv8_32(u32 w0, u32 w1, u32 w2, u32 w3, u32 w4, u32 w5,
static int __scm_call_armv8_32(u32 w0, u32 w1, u32 w2, u32 w3, u32 w4, u32 w5,
u64 *ret1, u64 *ret2, u64 *ret3)
{
atomic_t *cnt = per_cpu_ptr(&scm_call_count, raw_smp_processor_id());
int ret;
atomic_inc(cnt);
atomic_inc(&scm_call_count);
ret = ___scm_call_armv8_32(w0, w1, w2, w3, w4, w5, ret1, ret2, ret3);
atomic_dec(cnt);
atomic_dec(&scm_call_count);
return ret;
}
@ -559,12 +557,11 @@ static int __scm_call_armv8_32(u32 w0, u32 w1, u32 w2, u32 w3, u32 w4, u32 w5,
static int __scm_call_armv8_32(u32 w0, u32 w1, u32 w2, u32 w3, u32 w4, u32 w5,
u64 *ret1, u64 *ret2, u64 *ret3)
{
atomic_t *cnt = per_cpu_ptr(&scm_call_count, raw_smp_processor_id());
int ret;
atomic_inc(cnt);
atomic_inc(&scm_call_count);
ret = ___scm_call_armv8_32(w0, w1, w2, w3, w4, w5, ret1, ret2, ret3);
atomic_dec(cnt);
atomic_dec(&scm_call_count);
return ret;
}
@ -1355,7 +1352,7 @@ inline int scm_enable_mem_protection(void)
#endif
EXPORT_SYMBOL(scm_enable_mem_protection);
bool under_scm_call(int cpu)
bool under_scm_call(void)
{
return atomic_read(per_cpu_ptr(&scm_call_count, cpu));
return atomic_read(&scm_call_count);
}

View File

@ -80,8 +80,7 @@ struct msm_watchdog_data {
unsigned int min_slack_ticks;
unsigned long long min_slack_ns;
void *scm_regsave;
atomic_t alive_mask;
atomic_t pinged_mask;
cpumask_t alive_mask;
struct mutex disable_lock;
bool irq_ppi;
struct msm_watchdog_data __percpu **wdog_cpu_dd;
@ -98,6 +97,8 @@ struct msm_watchdog_data {
bool user_pet_complete;
unsigned long long timer_fired;
unsigned long long thread_start;
unsigned long long ping_start[NR_CPUS];
unsigned long long ping_end[NR_CPUS];
unsigned int cpu_scandump_sizes[NR_CPUS];
/* When single buffer is used to collect Scandump */
@ -142,8 +143,8 @@ static void dump_cpu_alive_mask(struct msm_watchdog_data *wdog_dd)
{
static char alive_mask_buf[MASK_SIZE];
scnprintf(alive_mask_buf, MASK_SIZE, "%x",
atomic_read(&wdog_dd->alive_mask));
scnprintf(alive_mask_buf, MASK_SIZE, "%*pb1", cpumask_pr_args(
&wdog_dd->alive_mask));
dev_info(wdog_dd->dev, "cpu alive mask from last pet %s\n",
alive_mask_buf);
}
@ -381,59 +382,33 @@ static void pet_watchdog(struct msm_watchdog_data *wdog_dd)
static void keep_alive_response(void *info)
{
struct msm_watchdog_data *wdog_dd = wdog_data;
unsigned int this_cpu_bit = (unsigned long)info >> 32;
unsigned int final_alive_mask = (unsigned int)(long)info;
unsigned int old;
int cpu = smp_processor_id();
struct msm_watchdog_data *wdog_dd = (struct msm_watchdog_data *)info;
/* Wake up the watchdog task if we're the final pinged CPU */
old = atomic_fetch_or_relaxed(this_cpu_bit, &wdog_data->alive_mask);
if (old == (final_alive_mask & ~this_cpu_bit))
wake_up_process(wdog_dd->watchdog_task);
cpumask_set_cpu(cpu, &wdog_dd->alive_mask);
wdog_dd->ping_end[cpu] = sched_clock();
/* Make sure alive mask is cleared and set in order */
smp_mb();
}
static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data);
/*
* If this function does not return, it implies one of the
* other cpu's is not responsive.
*/
static void ping_other_cpus(struct msm_watchdog_data *wdog_dd)
{
unsigned long online_mask, ping_mask = 0;
unsigned int final_alive_mask;
int cpu, this_cpu;
int cpu;
/*
* Ping all CPUs other than the current one asynchronously so that we
* don't spend a lot of time spinning on the current CPU with IRQs
* disabled (which is what smp_call_function_single() does in
* synchronous mode).
*/
preempt_disable();
this_cpu = raw_smp_processor_id();
atomic_set(&wdog_dd->alive_mask, BIT(this_cpu));
online_mask = *cpumask_bits(cpu_online_mask) & ~BIT(this_cpu);
for_each_cpu(cpu, to_cpumask(&online_mask)) {
if (!cpu_idle_pc_state[cpu] && !cpu_isolated(cpu))
ping_mask |= BIT(cpu);
cpumask_clear(&wdog_dd->alive_mask);
/* Make sure alive mask is cleared and set in order */
smp_mb();
for_each_cpu(cpu, cpu_online_mask) {
if (!cpu_idle_pc_state[cpu] && !cpu_isolated(cpu)) {
wdog_dd->ping_start[cpu] = sched_clock();
smp_call_function_single(cpu, keep_alive_response,
wdog_dd, 1);
}
}
final_alive_mask = ping_mask | BIT(this_cpu);
for_each_cpu(cpu, to_cpumask(&ping_mask)) {
generic_exec_single(cpu, per_cpu_ptr(&csd_data, cpu),
keep_alive_response,
(void *)(BIT(cpu + 32) | final_alive_mask));
}
preempt_enable();
atomic_set(&wdog_dd->pinged_mask, final_alive_mask);
while (1) {
set_current_state(TASK_UNINTERRUPTIBLE);
if (atomic_read(&wdog_dd->alive_mask) == final_alive_mask)
break;
schedule();
}
__set_current_state(TASK_RUNNING);
}
static void pet_task_wakeup(unsigned long data)
@ -451,7 +426,7 @@ static __ref int watchdog_kthread(void *arg)
(struct msm_watchdog_data *)arg;
unsigned long delay_time = 0;
struct sched_param param = {.sched_priority = MAX_RT_PRIO-1};
int ret;
int ret, cpu;
sched_setscheduler(current, SCHED_FIFO, &param);
while (!kthread_should_stop()) {
@ -461,6 +436,9 @@ static __ref int watchdog_kthread(void *arg)
} while (ret != 0);
wdog_dd->thread_start = sched_clock();
for_each_cpu(cpu, cpu_present_mask)
wdog_dd->ping_start[cpu] = wdog_dd->ping_end[cpu] = 0;
if (wdog_dd->do_ipi_ping)
ping_other_cpus(wdog_dd);
@ -928,6 +906,7 @@ static int msm_watchdog_probe(struct platform_device *pdev)
wdog_data = wdog_dd;
wdog_dd->dev = &pdev->dev;
platform_set_drvdata(pdev, wdog_dd);
cpumask_clear(&wdog_dd->alive_mask);
wdog_dd->watchdog_task = kthread_create(watchdog_kthread, wdog_dd,
"msm_watchdog");
if (IS_ERR(wdog_dd->watchdog_task)) {

View File

@ -97,8 +97,6 @@ extern void smp_cpus_done(unsigned int max_cpus);
/*
* Call a function on all other processors
*/
int generic_exec_single(int cpu, struct __call_single_data *csd,
smp_call_func_t func, void *info);
int smp_call_function(smp_call_func_t func, void *info, int wait);
void smp_call_function_many(const struct cpumask *mask,
smp_call_func_t func, void *info, bool wait);

View File

@ -124,7 +124,7 @@ struct scm_hdcp_req {
};
extern struct mutex scm_lmh_lock;
extern bool under_scm_call(int cpu);
extern bool under_scm_call(void);
#else
static inline int scm_call2(u32 cmd_id, struct scm_desc *desc)
@ -186,7 +186,7 @@ static inline int scm_enable_mem_protection(void)
{
return 0;
}
extern bool under_scm_call(int cpu)
extern bool under_scm_call(void)
{
return false;
}

View File

@ -130,8 +130,7 @@ static __always_inline void csd_lock(struct __call_single_data *csd)
static __always_inline void csd_unlock(struct __call_single_data *csd)
{
if (!(csd->flags & CSD_FLAG_LOCK))
return;
WARN_ON(!(csd->flags & CSD_FLAG_LOCK));
/*
* ensure we're all done before releasing data:
@ -148,7 +147,7 @@ extern void send_call_function_single_ipi(int cpu);
* for execution on the given CPU. data must already have
* ->func, ->info, and ->flags set.
*/
int generic_exec_single(int cpu, struct __call_single_data *csd,
static int generic_exec_single(int cpu, struct __call_single_data *csd,
smp_call_func_t func, void *info)
{
if (cpu == smp_processor_id()) {
@ -371,7 +370,7 @@ int smp_call_function_single_async(int cpu, struct __call_single_data *csd)
{
int err = 0;
migrate_disable();
preempt_disable();
/* We could deadlock if we have to wait here with interrupts disabled! */
if (WARN_ON_ONCE(csd->flags & CSD_FLAG_LOCK))
@ -381,7 +380,7 @@ int smp_call_function_single_async(int cpu, struct __call_single_data *csd)
smp_wmb();
err = generic_exec_single(cpu, csd, csd->func, csd->info);
migrate_enable();
preempt_enable();
return err;
}