Revert "cpuidle: lpm-levels: Remove idle prediction feature"

This reverts commit c980eae303b3d4f6c7ecc89c3ac6440b8935c4b0. Change-Id: I324f1ded32ad960978ce2684bbd50f04f7f16f0d Signed-off-by: Richard Raya <rdxzv.dev@gmail.com>
2025-02-20 11:45:48 +08:00 · 2024-12-07 00:02:01 -03:00 · 2024-12-07 00:02:01 -03:00 · 96c0eee27a
commit 96c0eee27a
parent 89a65274b2
5 changed files with 731 additions and 9 deletions
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@ -52,6 +52,8 @@
 #include <asm/mach/arch.h>
 #include <asm/mpu.h>

+#include <soc/qcom/lpm_levels.h>
+
 #define CREATE_TRACE_POINTS
 #include <trace/events/ipi.h>

@ -721,6 +723,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs)

 void smp_send_reschedule(int cpu)
 {
+	update_ipi_history(cpu);
 	smp_cross_call_common(cpumask_of(cpu), IPI_RESCHEDULE);
 }

--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@ -62,6 +62,7 @@
 #include <soc/qcom/minidump.h>

 #include <soc/qcom/scm.h>
+#include <soc/qcom/lpm_levels.h>

 #define CREATE_TRACE_POINTS
 #include <trace/events/ipi.h>
@ -935,6 +936,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
 void smp_send_reschedule(int cpu)
 {
 	BUG_ON(cpu_is_offline(cpu));
+	update_ipi_history(cpu);
 	smp_cross_call_common(cpumask_of(cpu), IPI_RESCHEDULE);
 }

--- a/drivers/cpuidle/lpm-levels-of.c
+++ b/drivers/cpuidle/lpm-levels-of.c
@ -361,6 +361,17 @@ static int parse_cluster_params(struct device_node *node,
 	if (ret)
 		goto fail;

+	key = "qcom,disable-prediction";
+	c->lpm_prediction = !(of_property_read_bool(node, key));
+
+	if (c->lpm_prediction) {
+		key = "qcom,clstr-tmr-add";
+		ret = of_property_read_u32(node, key, &c->tmr_add);
+		if (ret || c->tmr_add < TIMER_ADD_LOW ||
+					c->tmr_add > TIMER_ADD_HIGH)
+			c->tmr_add = DEFAULT_TIMER_ADD;
+	}
+
 	/* Set default_level to 0 as default */
 	c->default_level = 0;

@ -578,6 +589,32 @@ static int parse_cpu_levels(struct device_node *node, struct lpm_cluster *c)
 	if (ret)
 		goto failed;

+	cpu->ipi_prediction = !(of_property_read_bool(node,
+					"qcom,disable-ipi-prediction"));
+
+	cpu->lpm_prediction = !(of_property_read_bool(node,
+					"qcom,disable-prediction"));
+
+	if (cpu->lpm_prediction) {
+		key = "qcom,ref-stddev";
+		ret = of_property_read_u32(node, key, &cpu->ref_stddev);
+		if (ret || cpu->ref_stddev < STDDEV_LOW ||
+					cpu->ref_stddev > STDDEV_HIGH)
+			cpu->ref_stddev = DEFAULT_STDDEV;
+
+		key = "qcom,tmr-add";
+		ret = of_property_read_u32(node, key, &cpu->tmr_add);
+		if (ret || cpu->tmr_add < TIMER_ADD_LOW ||
+					cpu->tmr_add > TIMER_ADD_HIGH)
+			cpu->tmr_add = DEFAULT_TIMER_ADD;
+
+		key = "qcom,ref-premature-cnt";
+		ret = of_property_read_u32(node, key, &cpu->ref_premature_cnt);
+		if (ret || cpu->ref_premature_cnt < PREMATURE_CNT_LOW ||
+				cpu->ref_premature_cnt > PREMATURE_CNT_HIGH)
+			cpu->ref_premature_cnt = DEFAULT_PREMATURE_CNT;
+	}
+
 	key = "parse_cpu";
 	ret = parse_cpu(node, cpu);
 	if (ret)
--- a/drivers/cpuidle/lpm-levels.c
+++ b/drivers/cpuidle/lpm-levels.c
@ -64,12 +64,38 @@ static struct system_pm_ops *sys_pm_ops;

 struct lpm_cluster *lpm_root_node;

+#define MAXSAMPLES 5
+
+static bool lpm_prediction = true;
+module_param_named(lpm_prediction, lpm_prediction, bool, 0664);
+
 static uint32_t bias_hyst;
 module_param_named(bias_hyst, bias_hyst, uint, 0664);
+static bool lpm_ipi_prediction = true;
+module_param_named(lpm_ipi_prediction, lpm_ipi_prediction, bool, 0664);

+struct lpm_history {
+	uint32_t resi[MAXSAMPLES];
+	int mode[MAXSAMPLES];
+	int nsamp;
+	uint32_t hptr;
+	uint32_t hinvalid;
+	uint32_t htmr_wkup;
+	int64_t stime;
+};
+
+struct ipi_history {
+	uint32_t interval[MAXSAMPLES];
+	uint32_t current_ptr;
+	ktime_t cpu_idle_resched_ts;
+};
+
+static DEFINE_PER_CPU(struct lpm_history, hist);
+static DEFINE_PER_CPU(struct ipi_history, cpu_ipi_history);
 static DEFINE_PER_CPU(struct lpm_cpu*, cpu_lpm);
 static bool suspend_in_progress;
 static struct hrtimer lpm_hrtimer;
+static DEFINE_PER_CPU(struct hrtimer, histtimer);
 static DEFINE_PER_CPU(struct hrtimer, biastimer);

 static void cluster_unprepare(struct lpm_cluster *cluster,
@ -128,6 +154,97 @@ static enum hrtimer_restart lpm_hrtimer_cb(struct hrtimer *h)
 	return HRTIMER_NORESTART;
 }

+static void histtimer_cancel(void)
+{
+	unsigned int cpu = raw_smp_processor_id();
+	struct hrtimer *cpu_histtimer = &per_cpu(histtimer, cpu);
+	ktime_t time_rem;
+
+	time_rem = hrtimer_get_remaining(cpu_histtimer);
+	if (ktime_to_us(time_rem) <= 0)
+		return;
+
+	hrtimer_try_to_cancel(cpu_histtimer);
+}
+
+static enum hrtimer_restart histtimer_fn(struct hrtimer *h)
+{
+	int cpu = raw_smp_processor_id();
+	struct lpm_history *history = &per_cpu(hist, cpu);
+
+	history->hinvalid = 1;
+	return HRTIMER_NORESTART;
+}
+
+static void histtimer_start(uint32_t time_us)
+{
+	uint64_t time_ns = time_us * NSEC_PER_USEC;
+	ktime_t hist_ktime = ns_to_ktime(time_ns);
+	unsigned int cpu = raw_smp_processor_id();
+	struct hrtimer *cpu_histtimer = &per_cpu(histtimer, cpu);
+
+	cpu_histtimer->function = histtimer_fn;
+	hrtimer_start(cpu_histtimer, hist_ktime, HRTIMER_MODE_REL_PINNED_HARD);
+}
+
+static void cluster_timer_init(struct lpm_cluster *cluster)
+{
+	struct list_head *list;
+
+	if (!cluster)
+		return;
+
+	hrtimer_init(&cluster->histtimer, CLOCK_MONOTONIC,
+		     HRTIMER_MODE_REL_HARD);
+
+	list_for_each(list, &cluster->child) {
+		struct lpm_cluster *n;
+
+		n = list_entry(list, typeof(*n), list);
+		cluster_timer_init(n);
+	}
+}
+
+static void clusttimer_cancel(void)
+{
+	int cpu = raw_smp_processor_id();
+	struct lpm_cluster *cluster = per_cpu(cpu_lpm, cpu)->parent;
+	ktime_t time_rem;
+
+	time_rem = hrtimer_get_remaining(&cluster->histtimer);
+	if (ktime_to_us(time_rem) > 0)
+		hrtimer_try_to_cancel(&cluster->histtimer);
+
+	if (cluster->parent) {
+		time_rem = hrtimer_get_remaining(
+			&cluster->parent->histtimer);
+
+		if (ktime_to_us(time_rem) <= 0)
+			return;
+
+		hrtimer_try_to_cancel(&cluster->parent->histtimer);
+	}
+}
+
+static enum hrtimer_restart clusttimer_fn(struct hrtimer *h)
+{
+	struct lpm_cluster *cluster = container_of(h,
+				struct lpm_cluster, histtimer);
+
+	cluster->history.hinvalid = 1;
+	return HRTIMER_NORESTART;
+}
+
+static void clusttimer_start(struct lpm_cluster *cluster, uint32_t time_us)
+{
+	uint64_t time_ns = time_us * NSEC_PER_USEC;
+	ktime_t clust_ktime = ns_to_ktime(time_ns);
+
+	cluster->histtimer.function = clusttimer_fn;
+	hrtimer_start(&cluster->histtimer, clust_ktime,
+		      HRTIMER_MODE_REL_PINNED_HARD);
+}
+
 static void msm_pm_set_timer(uint32_t modified_time_us)
 {
 	u64 modified_time_ns = modified_time_us * NSEC_PER_USEC;
@ -165,6 +282,192 @@ static void biastimer_start(uint32_t time_ns)
 	hrtimer_start(cpu_biastimer, bias_ktime, HRTIMER_MODE_REL_PINNED_HARD);
 }

+static uint64_t find_deviation(int *interval, uint32_t ref_stddev,
+				int64_t *stime)
+{
+	int divisor, i;
+	uint64_t max, avg, stddev;
+	int64_t thresh = LLONG_MAX;
+
+	do {
+		max = avg = divisor = stddev = 0;
+		for (i = 0; i < MAXSAMPLES; i++) {
+			int64_t value = interval[i];
+
+			if (value <= thresh) {
+				avg += value;
+				divisor++;
+				if (value > max)
+					max = value;
+			}
+		}
+		do_div(avg, divisor);
+
+		for (i = 0; i < MAXSAMPLES; i++) {
+			int64_t value = interval[i];
+
+			if (value <= thresh) {
+				int64_t diff = value - avg;
+
+				stddev += diff * diff;
+			}
+		}
+		do_div(stddev, divisor);
+		stddev = int_sqrt(stddev);
+
+	/*
+	 * If the deviation is less, return the average, else
+	 * ignore one maximum sample and retry
+	 */
+		if (((avg > stddev * 6) && (divisor >= (MAXSAMPLES - 1)))
+					|| stddev <= ref_stddev) {
+			*stime = ktime_to_us(ktime_get()) + avg;
+			return avg;
+		}
+		thresh = max - 1;
+
+	} while (divisor > (MAXSAMPLES - 1));
+
+	return 0;
+}
+
+static uint64_t lpm_cpuidle_predict(struct cpuidle_device *dev,
+		struct lpm_cpu *cpu, int *idx_restrict,
+		uint32_t *idx_restrict_time, uint32_t *ipi_predicted)
+{
+	int i, j;
+	uint64_t avg;
+	struct lpm_history *history = &per_cpu(hist, dev->cpu);
+	struct ipi_history *ipi_history = &per_cpu(cpu_ipi_history, dev->cpu);
+
+	if (!lpm_prediction || !cpu->lpm_prediction)
+		return 0;
+
+	/*
+	 * Samples are marked invalid when woken-up due to timer,
+	 * so donot predict.
+	 */
+	if (history->hinvalid) {
+		history->hinvalid = 0;
+		history->htmr_wkup = 1;
+		history->stime = 0;
+		return 1;
+	}
+
+	/*
+	 * Predict only when all the samples are collected.
+	 */
+	if (history->nsamp < MAXSAMPLES) {
+		history->stime = 0;
+		return 0;
+	}
+
+	/*
+	 * Check if the samples are not much deviated, if so use the
+	 * average of those as predicted sleep time. Else if any
+	 * specific mode has more premature exits return the index of
+	 * that mode.
+	 */
+
+	avg = find_deviation(history->resi, cpu->ref_stddev, &(history->stime));
+	if (avg)
+		return avg;
+
+	/*
+	 * Find the number of premature exits for each of the mode,
+	 * excluding clockgating mode, and they are more than fifty
+	 * percent restrict that and deeper modes.
+	 */
+	if (history->htmr_wkup != 1) {
+		for (j = 1; j < cpu->nlevels; j++) {
+			struct lpm_cpu_level *level = &cpu->levels[j];
+			uint32_t min_residency = level->pwr.min_residency;
+			uint32_t max_residency = 0;
+			struct lpm_cpu_level *lvl;
+			uint32_t failed = 0;
+			uint64_t total = 0;
+
+			for (i = 0; i < MAXSAMPLES; i++) {
+				if ((history->mode[i] == j) &&
+					(history->resi[i] < min_residency)) {
+					failed++;
+					total += history->resi[i];
+				}
+			}
+			if (failed >= cpu->ref_premature_cnt) {
+				*idx_restrict = j;
+				do_div(total, failed);
+				for (i = 0; i < j; i++) {
+					lvl = &cpu->levels[i];
+					max_residency = lvl->pwr.max_residency;
+					if (total < max_residency) {
+						*idx_restrict = i + 1;
+						total = max_residency;
+						break;
+					}
+				}
+
+				*idx_restrict_time = total;
+				history->stime = ktime_to_us(ktime_get())
+						+ *idx_restrict_time;
+				break;
+			}
+		}
+	}
+
+	if (*idx_restrict_time || !cpu->ipi_prediction || !lpm_ipi_prediction)
+		return 0;
+
+	avg = find_deviation(ipi_history->interval, cpu->ref_stddev
+						+ DEFAULT_IPI_STDDEV,
+						&(history->stime));
+	if (avg) {
+		*ipi_predicted = 1;
+		return avg;
+	}
+
+	return 0;
+}
+
+static inline void invalidate_predict_history(struct cpuidle_device *dev)
+{
+	struct lpm_history *history = &per_cpu(hist, dev->cpu);
+	struct lpm_cpu *lpm_cpu = per_cpu(cpu_lpm, dev->cpu);
+
+	if (!lpm_prediction || !lpm_cpu->lpm_prediction)
+		return;
+
+	if (history->hinvalid) {
+		history->hinvalid = 0;
+		history->htmr_wkup = 1;
+		history->stime = 0;
+	}
+}
+
+static void clear_predict_history(void)
+{
+	struct lpm_history *history;
+	int i;
+	unsigned int cpu;
+	struct lpm_cpu *lpm_cpu = per_cpu(cpu_lpm, raw_smp_processor_id());
+
+	if (!lpm_prediction || !lpm_cpu->lpm_prediction)
+		return;
+
+	for_each_possible_cpu(cpu) {
+		history = &per_cpu(hist, cpu);
+		for (i = 0; i < MAXSAMPLES; i++) {
+			history->resi[i]  = 0;
+			history->mode[i] = -1;
+			history->hptr = 0;
+			history->nsamp = 0;
+			history->stime = 0;
+		}
+	}
+}
+
+static void update_history(struct cpuidle_device *dev, int idx);
+
 static inline bool is_cpu_biased(int cpu, uint64_t *bias_time)
 {
 	u64 now = sched_clock();
@ -195,8 +498,10 @@ static int cpu_power_select(struct cpuidle_device *dev,
 	uint32_t next_event_us = 0;
 	int i, idx_restrict;
 	uint32_t lvl_latency_us = 0;
+	uint64_t predicted = 0;
+	uint32_t htime = 0, idx_restrict_time = 0, ipi_predicted = 0;
 	uint32_t next_wakeup_us = (uint32_t)sleep_us;
-	uint32_t max_residency;
+	uint32_t min_residency, max_residency;
 	struct power_params *pwr_params;
 	uint64_t bias_time = 0;

@ -223,6 +528,7 @@ static int cpu_power_select(struct cpuidle_device *dev,

 		pwr_params = &cpu->levels[i].pwr;
 		lvl_latency_us = pwr_params->exit_latency;
+		min_residency = pwr_params->min_residency;
 		max_residency = pwr_params->max_residency;

 		if (latency_us <= lvl_latency_us)
@ -237,6 +543,23 @@ static int cpu_power_select(struct cpuidle_device *dev,
 				next_wakeup_us = next_event_us - lvl_latency_us;
 		}

+		if (!i && !cpu_isolated(dev->cpu)) {
+			/*
+			 * If the next_wake_us itself is not sufficient for
+			 * deeper low power modes than clock gating do not
+			 * call prediction.
+			 */
+			if (next_wakeup_us > max_residency) {
+				predicted = (lpm_cpuidle_predict(dev, cpu,
+					&idx_restrict,
+					&idx_restrict_time, &ipi_predicted) == 1) ? 0 :
+						(max_residency >> 1);
+				if (predicted && (predicted < min_residency))
+					predicted = min_residency;
+			} else
+				invalidate_predict_history(dev);
+		}
+
 		if (i >= idx_restrict)
 			break;

@ -247,16 +570,44 @@ static int cpu_power_select(struct cpuidle_device *dev,
 		else
 			modified_time_us = 0;

-		if (next_wakeup_us <= max_residency)
+		if (predicted ? (predicted <= max_residency)
+			: (next_wakeup_us <= max_residency))
 			break;
 	}

 	if (modified_time_us)
 		msm_pm_set_timer(modified_time_us);

+	/*
+	 * Start timer to avoid staying in shallower mode forever
+	 * incase of misprediciton
+	 */
+
+	pwr_params = &cpu->levels[best_level].pwr;
+	min_residency = pwr_params->min_residency;
+	max_residency = pwr_params->max_residency;
+
+	if ((predicted || (idx_restrict != (cpu->nlevels + 1)))
+			&& (best_level < (cpu->nlevels-1))) {
+		htime = predicted + cpu->tmr_add;
+		if (lpm_ipi_prediction && cpu->ipi_prediction)
+			htime += DEFAULT_IPI_TIMER_ADD;
+		if (!predicted)
+			htime = idx_restrict_time;
+		else if (htime > max_residency)
+			htime = max_residency;
+
+		if ((next_wakeup_us > htime) &&
+			((next_wakeup_us - htime) > max_residency))
+			histtimer_start(htime);
+	}
+
 done_select:
 	trace_cpu_power_select(best_level, sleep_us, latency_us, next_event_us);

+	trace_cpu_pred_select(idx_restrict_time ? 2 : (ipi_predicted ?
+				3 : (predicted ? 1 : 0)), predicted, htime);
+
 	return best_level;
 }

@ -282,11 +633,13 @@ static unsigned int get_next_online_cpu(bool from_idle)
 }

 static uint64_t get_cluster_sleep_time(struct lpm_cluster *cluster,
-		bool from_idle)
+		bool from_idle, uint32_t *pred_time)
 {
 	int cpu;
 	ktime_t next_event;
 	struct cpumask online_cpus_in_cluster;
+	struct lpm_history *history;
+	int64_t prediction = LONG_MAX;

 	if (!from_idle)
 		return ~0ULL;
@ -301,6 +654,17 @@ static uint64_t get_cluster_sleep_time(struct lpm_cluster *cluster,
 		next_event_c = get_next_event_cpu(cpu);
 		if (*next_event_c < next_event)
 			next_event = *next_event_c;
+
+		if (from_idle && lpm_prediction && cluster->lpm_prediction) {
+			history = &per_cpu(hist, cpu);
+			if (history->stime && (history->stime < prediction))
+				prediction = history->stime;
+		}
+	}
+
+	if (from_idle && lpm_prediction && cluster->lpm_prediction) {
+		if (prediction > ktime_to_us(ktime_get()))
+			*pred_time = prediction - ktime_to_us(ktime_get());
 	}

 	if (ktime_to_us(next_event) > ktime_to_us(ktime_get()))
@ -309,18 +673,192 @@ static uint64_t get_cluster_sleep_time(struct lpm_cluster *cluster,
 		return 0;
 }

-static int cluster_select(struct lpm_cluster *cluster, bool from_idle)
+static int cluster_predict(struct lpm_cluster *cluster,
+				uint32_t *pred_us)
+{
+	int i, j;
+	int ret = 0;
+	struct cluster_history *history = &cluster->history;
+	int64_t cur_time = ktime_to_us(ktime_get());
+
+	if (!lpm_prediction || !cluster->lpm_prediction)
+		return 0;
+
+	if (history->hinvalid) {
+		history->hinvalid = 0;
+		history->htmr_wkup = 1;
+		history->flag = 0;
+		return ret;
+	}
+
+	if (history->nsamp == MAXSAMPLES) {
+		for (i = 0; i < MAXSAMPLES; i++) {
+			if ((cur_time - history->stime[i])
+					> CLUST_SMPL_INVLD_TIME)
+				history->nsamp--;
+		}
+	}
+
+	if (history->nsamp < MAXSAMPLES) {
+		history->flag = 0;
+		return ret;
+	}
+
+	if (history->flag == 2)
+		history->flag = 0;
+
+	if (history->htmr_wkup != 1) {
+		uint64_t total = 0;
+
+		if (history->flag == 1) {
+			for (i = 0; i < MAXSAMPLES; i++)
+				total += history->resi[i];
+			do_div(total, MAXSAMPLES);
+			*pred_us = total;
+			return 2;
+		}
+
+		for (j = 1; j < cluster->nlevels; j++) {
+			uint32_t failed = 0;
+
+			total = 0;
+			for (i = 0; i < MAXSAMPLES; i++) {
+				if ((history->mode[i] == j) && (history->resi[i]
+				< cluster->levels[j].pwr.min_residency)) {
+					failed++;
+					total += history->resi[i];
+				}
+			}
+
+			if (failed > (MAXSAMPLES-2)) {
+				do_div(total, failed);
+				*pred_us = total;
+				history->flag = 1;
+				return 1;
+			}
+		}
+	}
+
+	return ret;
+}
+
+static void update_cluster_history_time(struct cluster_history *history,
+						int idx, uint64_t start)
+{
+	history->entry_idx = idx;
+	history->entry_time = start;
+}
+
+static void update_cluster_history(struct cluster_history *history, int idx)
+{
+	uint32_t tmr = 0;
+	uint32_t residency = 0;
+	struct lpm_cluster *cluster =
+			container_of(history, struct lpm_cluster, history);
+
+	if (!lpm_prediction || !cluster->lpm_prediction)
+		return;
+
+	if ((history->entry_idx == -1) || (history->entry_idx == idx)) {
+		residency = ktime_to_us(ktime_get()) - history->entry_time;
+		history->stime[history->hptr] = history->entry_time;
+	} else
+		return;
+
+	if (history->htmr_wkup) {
+		if (!history->hptr)
+			history->hptr = MAXSAMPLES-1;
+		else
+			history->hptr--;
+
+		history->resi[history->hptr] += residency;
+
+		history->htmr_wkup = 0;
+		tmr = 1;
+	} else
+		history->resi[history->hptr] = residency;
+
+	history->mode[history->hptr] = idx;
+
+	history->entry_idx = INT_MIN;
+	history->entry_time = 0;
+
+	if (history->nsamp < MAXSAMPLES)
+		history->nsamp++;
+
+	trace_cluster_pred_hist(cluster->cluster_name,
+		history->mode[history->hptr], history->resi[history->hptr],
+		history->hptr, tmr);
+
+	(history->hptr)++;
+
+	if (history->hptr >= MAXSAMPLES)
+		history->hptr = 0;
+}
+
+static void clear_cl_history_each(struct cluster_history *history)
+{
+	int i;
+
+	for (i = 0; i < MAXSAMPLES; i++) {
+		history->resi[i]  = 0;
+		history->mode[i] = -1;
+		history->stime[i] = 0;
+	}
+
+	history->hptr = 0;
+	history->nsamp = 0;
+	history->flag = 0;
+	history->hinvalid = 0;
+	history->htmr_wkup = 0;
+}
+static void clear_cl_predict_history(void)
+{
+	struct lpm_cluster *cluster = lpm_root_node;
+	struct list_head *list;
+
+	if (!lpm_prediction || !cluster->lpm_prediction)
+		return;
+
+	clear_cl_history_each(&cluster->history);
+
+	list_for_each(list, &cluster->child) {
+		struct lpm_cluster *n;
+
+		n = list_entry(list, typeof(*n), list);
+		clear_cl_history_each(&n->history);
+	}
+}
+
+static int cluster_select(struct lpm_cluster *cluster, bool from_idle,
+							int *ispred)
 {
 	int best_level = -1;
 	int i;
 	struct cpumask mask;
 	uint32_t latency_us = ~0U;
 	uint32_t sleep_us;
+	uint32_t cpupred_us = 0, pred_us = 0;
+	int pred_mode = 0, predicted = 0;

 	if (!cluster)
 		return -EINVAL;

-	sleep_us = (uint32_t)get_cluster_sleep_time(cluster, from_idle);
+	sleep_us = (uint32_t)get_cluster_sleep_time(cluster,
+						from_idle, &cpupred_us);
+
+	if (from_idle) {
+		pred_mode = cluster_predict(cluster, &pred_us);
+
+		if (cpupred_us && pred_mode && (cpupred_us < pred_us))
+			pred_us = cpupred_us;
+
+		if (pred_us && pred_mode && (pred_us < sleep_us))
+			predicted = 1;
+
+		if (predicted && (pred_us == cpupred_us))
+			predicted = 2;
+	}

 	if (cpumask_and(&mask, cpu_online_mask, &cluster->child_cpus))
 		latency_us = pm_qos_request_for_cpumask(PM_QOS_CPU_DMA_LATENCY,
@ -356,10 +894,20 @@ static int cluster_select(struct lpm_cluster *cluster, bool from_idle)

 		best_level = i;

-		if (from_idle && sleep_us <= pwr_params->max_residency)
+		if (from_idle &&
+			(predicted ? (pred_us <= pwr_params->max_residency)
+			: (sleep_us <= pwr_params->max_residency)))
 			break;
 	}

+	if ((best_level == (cluster->nlevels - 1)) && (pred_mode == 2))
+		cluster->history.flag = 2;
+
+	*ispred = predicted;
+
+	trace_cluster_pred_select(cluster->cluster_name, best_level, sleep_us,
+						latency_us, predicted, pred_us);
+
 	return best_level;
 }

@ -373,7 +921,7 @@ static void cluster_notify(struct lpm_cluster *cluster,
 }

 static int cluster_configure(struct lpm_cluster *cluster, int idx,
-		bool from_idle)
+		bool from_idle, int predicted)
 {
 	struct lpm_cluster_level *level = &cluster->levels[idx];
 	struct cpumask online_cpus, cpumask;
@ -391,6 +939,10 @@ static int cluster_configure(struct lpm_cluster *cluster, int idx,
 			cluster->num_children_in_sync.bits[0],
 			cluster->child_cpus.bits[0], from_idle);
 		lpm_stats_cluster_enter(cluster->stats, idx);
+
+		if (from_idle && lpm_prediction && cluster->lpm_prediction)
+			update_cluster_history_time(&cluster->history, idx,
+						ktime_to_us(ktime_get()));
 	}

 	if (level->notify_rpm) {
@ -405,6 +957,8 @@ static int cluster_configure(struct lpm_cluster *cluster, int idx,

 		cpu = get_next_online_cpu(from_idle);
 		cpumask_copy(&cpumask, cpumask_of(cpu));
+		clear_predict_history();
+		clear_cl_predict_history();
 		if (sys_pm_ops && sys_pm_ops->enter)
 			if ((sys_pm_ops->enter(&cpumask)))
 				return -EBUSY;
@ -414,6 +968,13 @@ static int cluster_configure(struct lpm_cluster *cluster, int idx,

 	cluster->last_level = idx;

+	if (predicted && (idx < (cluster->nlevels - 1))) {
+		struct power_params *pwr_params = &cluster->levels[idx].pwr;
+
+		clusttimer_start(cluster, pwr_params->max_residency +
+							cluster->tmr_add);
+	}
+
 	return 0;
 }

@ -422,6 +983,7 @@ static void cluster_prepare(struct lpm_cluster *cluster,
 		int64_t start_time)
 {
 	int i;
+	int predicted = 0;

 	if (!cluster)
 		return;
@ -452,11 +1014,29 @@ static void cluster_prepare(struct lpm_cluster *cluster,
 				&cluster->child_cpus))
 		goto failed;

-	i = cluster_select(cluster, from_idle);
+	i = cluster_select(cluster, from_idle, &predicted);
+
+	if (((i < 0) || (i == cluster->default_level))
+				&& predicted && from_idle) {
+		update_cluster_history_time(&cluster->history,
+					-1, ktime_to_us(ktime_get()));
+
+		if (i < 0) {
+			struct power_params *pwr_params =
+						&cluster->levels[0].pwr;
+
+			clusttimer_start(cluster,
+					pwr_params->max_residency +
+					cluster->tmr_add);
+
+			goto failed;
+		}
+	}
+
 	if (i < 0)
 		goto failed;

-	if (cluster_configure(cluster, i, from_idle))
+	if (cluster_configure(cluster, i, from_idle, predicted))
 		goto failed;

 	if ((!IS_ERR_OR_NULL(cluster->stats)) && (IS_ENABLED(CONFIG_MSM_IDLE_STATS)))
@ -501,6 +1081,10 @@ static void cluster_unprepare(struct lpm_cluster *cluster,
 					&lvl->num_cpu_votes, cpu);
 	}

+	if (from_idle && first_cpu &&
+		(cluster->last_level == cluster->default_level))
+		update_cluster_history(&cluster->history, cluster->last_level);
+
 	if (!first_cpu || cluster->last_level == cluster->default_level)
 		goto unlock_return;

@ -525,6 +1109,9 @@ static void cluster_unprepare(struct lpm_cluster *cluster,

 	cluster_notify(cluster, &cluster->levels[last_level], false);

+	if (from_idle)
+		update_cluster_history(&cluster->history, last_level);
+
 	cluster_unprepare(cluster->parent, &cluster->child_cpus,
 			last_level, from_idle, end_time, success);
 unlock_return:
@ -643,6 +1230,54 @@ static int lpm_cpuidle_select(struct cpuidle_driver *drv,
 	return cpu_power_select(dev, cpu);
 }

+void update_ipi_history(int cpu)
+{
+	struct ipi_history *history = &per_cpu(cpu_ipi_history, cpu);
+	ktime_t now = ktime_get();
+
+	history->interval[history->current_ptr] =
+			ktime_to_us(ktime_sub(now,
+			history->cpu_idle_resched_ts));
+	(history->current_ptr)++;
+	if (history->current_ptr >= MAXSAMPLES)
+		history->current_ptr = 0;
+	history->cpu_idle_resched_ts = now;
+}
+
+static void update_history(struct cpuidle_device *dev, int idx)
+{
+	struct lpm_history *history = &per_cpu(hist, dev->cpu);
+	uint32_t tmr = 0;
+	struct lpm_cpu *lpm_cpu = per_cpu(cpu_lpm, dev->cpu);
+
+	if (!lpm_prediction || !lpm_cpu->lpm_prediction)
+		return;
+
+	if (history->htmr_wkup) {
+		if (!history->hptr)
+			history->hptr = MAXSAMPLES-1;
+		else
+			history->hptr--;
+
+		history->resi[history->hptr] += dev->last_residency;
+		history->htmr_wkup = 0;
+		tmr = 1;
+	} else
+		history->resi[history->hptr] = dev->last_residency;
+
+	history->mode[history->hptr] = idx;
+
+	trace_cpu_pred_hist(history->mode[history->hptr],
+		history->resi[history->hptr], history->hptr, tmr);
+
+	if (history->nsamp < MAXSAMPLES)
+		history->nsamp++;
+
+	(history->hptr)++;
+	if (history->hptr >= MAXSAMPLES)
+		history->hptr = 0;
+}
+
 static int lpm_cpuidle_enter(struct cpuidle_device *dev,
 		struct cpuidle_driver *drv, int idx)
 {
@ -667,6 +1302,11 @@ exit:

 	cluster_unprepare(cpu->parent, cpumask, idx, true, end_time, success);
 	cpu_unprepare(cpu, idx, true);
+	update_history(dev, idx);
+	if (lpm_prediction && cpu->lpm_prediction) {
+		histtimer_cancel();
+		clusttimer_cancel();
+	}
 	if (cpu->bias) {
 		if (!idx)
 			biastimer_cancel();
@ -973,11 +1613,16 @@ static int lpm_probe(struct platform_device *pdev)
 	s2idle_set_ops(&lpm_s2idle_ops);
 	hrtimer_init(&lpm_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	for_each_possible_cpu(cpu) {
+		cpu_histtimer = &per_cpu(histtimer, cpu);
+		hrtimer_init(cpu_histtimer, CLOCK_MONOTONIC,
+			     HRTIMER_MODE_REL_HARD);
 		cpu_histtimer = &per_cpu(biastimer, cpu);
 		hrtimer_init(cpu_histtimer, CLOCK_MONOTONIC,
 			     HRTIMER_MODE_REL_HARD);
 	}

+	cluster_timer_init(lpm_root_node);
+
 	register_cluster_lpm_stats(lpm_root_node, NULL);

 	ret = cluster_cpuidle_register(lpm_root_node);
--- a/drivers/cpuidle/lpm-levels.h
+++ b/drivers/cpuidle/lpm-levels.h
@ -13,6 +13,19 @@
 #include <soc/qcom/pm.h>

 #define NR_LPM_LEVELS 8
+#define MAXSAMPLES 5
+#define CLUST_SMPL_INVLD_TIME 40000
+#define DEFAULT_PREMATURE_CNT 3
+#define DEFAULT_STDDEV 100
+#define DEFAULT_IPI_STDDEV 400
+#define DEFAULT_TIMER_ADD 100
+#define DEFAULT_IPI_TIMER_ADD 900
+#define TIMER_ADD_LOW 100
+#define TIMER_ADD_HIGH 1500
+#define STDDEV_LOW 100
+#define STDDEV_HIGH 1000
+#define PREMATURE_CNT_LOW 1
+#define PREMATURE_CNT_HIGH 5

 struct power_params {
 	uint32_t entry_latency;		/* Entry latency */
@ -37,6 +50,11 @@ struct lpm_cpu {
 	int nlevels;
 	unsigned int psci_mode_shift;
 	unsigned int psci_mode_mask;
+	uint32_t ref_stddev;
+	uint32_t ref_premature_cnt;
+	uint32_t tmr_add;
+	bool lpm_prediction;
+	bool ipi_prediction;
 	uint64_t bias;
 	struct cpuidle_driver *drv;
 	struct lpm_cluster *parent;
@ -68,6 +86,19 @@ struct lpm_cluster_level {
 	int reset_level;
 };

+struct cluster_history {
+	uint32_t resi[MAXSAMPLES];
+	int mode[MAXSAMPLES];
+	int64_t stime[MAXSAMPLES];
+	uint32_t hptr;
+	uint32_t hinvalid;
+	uint32_t htmr_wkup;
+	uint64_t entry_time;
+	int entry_idx;
+	int nsamp;
+	int flag;
+};
+
 struct lpm_cluster {
 	struct list_head list;
 	struct list_head child;
@ -78,6 +109,8 @@ struct lpm_cluster {
 	int min_child_level;
 	int default_level;
 	int last_level;
+	uint32_t tmr_add;
+	bool lpm_prediction;
 	struct list_head cpu;
 	spinlock_t sync_lock;
 	struct cpumask child_cpus;
@ -86,6 +119,7 @@ struct lpm_cluster {
 	struct lpm_stats *stats;
 	unsigned int psci_mode_shift;
 	unsigned int psci_mode_mask;
+	struct cluster_history history;
 	struct hrtimer histtimer;
 };

@ -99,6 +133,7 @@ bool lpm_cpu_mode_allow(unsigned int cpu,
 bool lpm_cluster_mode_allow(struct lpm_cluster *cluster,
 		unsigned int mode, bool from_idle);
 uint32_t *get_per_cpu_max_residency(int cpu);
+uint32_t *get_per_cpu_min_residency(int cpu);
 extern struct lpm_cluster *lpm_root_node;

 #if defined(CONFIG_SMP)