// SPDX-License-Identifier: GPL-2.0-only
/*
 * Copyright (c) 2021, The Linux Foundation. All rights reserved.
 * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved.
 */

#include <linux/types.h>
#include <soc/qcom/cmd-db.h>
#include <soc/qcom/tcs.h>

#include "adreno.h"
#include "adreno_gen8.h"
#include "kgsl_bus.h"
#include "kgsl_device.h"

struct rpmh_arc_vals {
	u32 num;
	const u16 *val;
};

struct bcm {
	const char *name;
	u32 buswidth;
	u32 channels;
	u32 unit;
	u16 width;
	u8 vcd;
	bool fixed;
};

struct bcm_data {
	__le32 unit;
	__le16 width;
	u8 vcd;
	u8 reserved;
};

struct rpmh_bw_votes {
	u32 wait_bitmask;
	u32 num_cmds;
	u32 *addrs;
	u32 num_levels;
	u32 **cmds;
};

#define ARC_VOTE_SET(pri, sec, vlvl) \
	(FIELD_PREP(GENMASK(31, 16), vlvl) | \
	 FIELD_PREP(GENMASK(15, 8), sec) | \
	 FIELD_PREP(GENMASK(7, 0), pri))

static int rpmh_arc_cmds(struct rpmh_arc_vals *arc, const char *res_id)
{
	size_t len = 0;

	arc->val = cmd_db_read_aux_data(res_id, &len);

	/*
	 * cmd_db_read_aux_data() gives us a zero-padded table of
	 * size len that contains the arc values. To determine the
	 * number of arc values, we loop through the table and count
	 * them until we get to the end of the buffer or hit the
	 * zero padding.
	 */
	for (arc->num = 1; arc->num < (len >> 1); arc->num++) {
		if (arc->val[arc->num - 1] != 0 && arc->val[arc->num] == 0)
			break;
	}

	return 0;
}

static int setup_volt_dependency_tbl(u32 *votes,
		struct rpmh_arc_vals *pri_rail, struct rpmh_arc_vals *sec_rail,
		u16 *vlvl, u32 num_entries)
{
	int i, j, k;
	uint16_t cur_vlvl;
	bool found_match;

	/* i tracks current KGSL GPU frequency table entry
	 * j tracks secondary rail voltage table entry
	 * k tracks primary rail voltage table entry
	 */
	for (i = 0; i < num_entries; i++) {
		found_match = false;

		/* Look for a primary rail voltage that matches a VLVL level */
		for (k = 0; k < pri_rail->num; k++) {
			if (pri_rail->val[k] >= vlvl[i]) {
				cur_vlvl = pri_rail->val[k];
				found_match = true;
				break;
			}
		}

		/* If we did not find a matching VLVL level then abort */
		if (!found_match)
			return -EINVAL;

		/*
		 * Look for a secondary rail index whose VLVL value
		 * is greater than or equal to the VLVL value of the
		 * corresponding index of the primary rail
		 */
		for (j = 0; j < sec_rail->num; j++) {
			if (sec_rail->val[j] >= cur_vlvl ||
					j + 1 == sec_rail->num)
				break;
		}

		if (j == sec_rail->num)
			j = 0;

		votes[i] = ARC_VOTE_SET(k, j, cur_vlvl);
	}

	return 0;
}

/* Generate a set of bandwidth votes for the list of BCMs */
static void tcs_cmd_data(struct bcm *bcms, int count,
		u32 ab, u32 ib, u32 *data, u32 perfmode_vote, bool set_perfmode)
{
	int i;

	for (i = 0; i < count; i++) {
		bool valid = true;
		bool commit = false;
		u64 avg, peak, x, y;

		if (i == count - 1 || bcms[i].vcd != bcms[i + 1].vcd)
			commit = true;

		if (bcms[i].fixed) {
			if (!ab && !ib)
				data[i] = BCM_TCS_CMD(commit, false, 0x0, 0x0);
			else
				data[i] = BCM_TCS_CMD(commit, true, 0x0,
							set_perfmode ? perfmode_vote : 0x0);
			continue;
		}

		/* Multiple the bandwidth by the width of the connection */
		avg = ((u64) ab) * bcms[i].width;

		/* And then divide by the total width */
		do_div(avg, bcms[i].buswidth);

		peak = ((u64) ib) * bcms[i].width;
		do_div(peak, bcms[i].buswidth);

		/* Input bandwidth value is in KBps */
		x = avg * 1000ULL;
		do_div(x, bcms[i].unit);

		/* Input bandwidth value is in KBps */
		y = peak * 1000ULL;
		do_div(y, bcms[i].unit);

		/*
		 * If a bandwidth value was specified but the calculation ends
		 * rounding down to zero, set a minimum level
		 */
		if (ab && x == 0)
			x = 1;

		if (ib && y == 0)
			y = 1;

		x = min_t(u64, x, BCM_TCS_CMD_VOTE_MASK);
		y = min_t(u64, y, BCM_TCS_CMD_VOTE_MASK);

		if (!x && !y)
			valid = false;

		data[i] = BCM_TCS_CMD(commit, valid, x, y);
	}
}

static void free_rpmh_bw_votes(struct rpmh_bw_votes *votes)
{
	int i;

	if (!votes)
		return;

	for (i = 0; votes->cmds && i < votes->num_levels; i++)
		kfree(votes->cmds[i]);

	kfree(votes->cmds);
	kfree(votes->addrs);
	kfree(votes);
}

/* Build the votes table from the specified bandwidth levels */
static struct rpmh_bw_votes *build_rpmh_bw_votes(struct bcm *bcms,
		int bcm_count, u32 *levels, int levels_count, u32 perfmode_vote, u32 perfmode_lvl)
{
	struct rpmh_bw_votes *votes;
	bool set_perfmode;
	int i;

	votes = kzalloc(sizeof(*votes), GFP_KERNEL);
	if (!votes)
		return ERR_PTR(-ENOMEM);

	votes->addrs = kcalloc(bcm_count, sizeof(*votes->cmds), GFP_KERNEL);
	if (!votes->addrs) {
		free_rpmh_bw_votes(votes);
		return ERR_PTR(-ENOMEM);
	}

	votes->cmds = kcalloc(levels_count, sizeof(*votes->cmds), GFP_KERNEL);
	if (!votes->cmds) {
		free_rpmh_bw_votes(votes);
		return ERR_PTR(-ENOMEM);
	}

	votes->num_cmds = bcm_count;
	votes->num_levels = levels_count;

	/* Get the cmd-db information for each BCM */
	for (i = 0; i < bcm_count; i++) {
		size_t l;
		const struct bcm_data *data;

		data = cmd_db_read_aux_data(bcms[i].name, &l);

		votes->addrs[i] = cmd_db_read_addr(bcms[i].name);

		bcms[i].unit = le32_to_cpu(data->unit);
		bcms[i].width = le16_to_cpu(data->width);
		bcms[i].vcd = data->vcd;
	}

	for (i = 0; i < bcm_count; i++) {
		if (i == (bcm_count - 1) || bcms[i].vcd != bcms[i + 1].vcd)
			votes->wait_bitmask |= (1 << i);
	}

	for (i = 0; i < levels_count; i++) {
		votes->cmds[i] = kcalloc(bcm_count, sizeof(u32), GFP_KERNEL);
		if (!votes->cmds[i]) {
			free_rpmh_bw_votes(votes);
			return ERR_PTR(-ENOMEM);
		}

		set_perfmode = (i >= perfmode_lvl) ? true : false;
		tcs_cmd_data(bcms, bcm_count, levels[i], levels[i], votes->cmds[i],
								perfmode_vote, set_perfmode);
	}

	return votes;
}

/*
 * setup_gmu_arc_votes - Build the gmu voting table
 * @gmu: Pointer to gmu device
 * @pri_rail: Pointer to primary power rail vlvl table
 * @sec_rail: Pointer to second/dependent power rail vlvl table
 *
 * This function initializes the cx votes for all gmu frequencies
 * for gmu dcvs
 */
static int setup_cx_arc_votes(struct gen8_gmu_device *gmu,
	struct rpmh_arc_vals *pri_rail, struct rpmh_arc_vals *sec_rail)
{
	/* Hardcoded values of GMU CX voltage levels */
	u16 gmu_cx_vlvl[MAX_CX_LEVELS];
	u32 cx_votes[MAX_CX_LEVELS];
	struct gen8_dcvs_table *table = &gmu->dcvs_table;
	u32 *freqs = gmu->freqs;
	u32 *vlvls = gmu->vlvls;
	int ret, i;

	gmu_cx_vlvl[0] = 0;
	gmu_cx_vlvl[1] = vlvls[0];
	gmu_cx_vlvl[2] = vlvls[1];

	table->gmu_level_num = 3;

	table->cx_votes[0].freq = 0;
	table->cx_votes[1].freq = freqs[0] / 1000;
	table->cx_votes[2].freq = freqs[1] / 1000;

	ret = setup_volt_dependency_tbl(cx_votes, pri_rail,
			sec_rail, gmu_cx_vlvl, table->gmu_level_num);
	if (!ret) {
		for (i = 0; i < table->gmu_level_num; i++)
			table->cx_votes[i].vote = cx_votes[i];
	}

	return ret;
}

static int to_cx_hlvl(struct rpmh_arc_vals *cx_rail, u32 vlvl, u32 *hlvl)
{
	u32 i;

	/*
	 * This means that the Gx level doesn't have a dependency on Cx level.
	 * Return the same value to disable cx voting at GMU.
	 */
	if (vlvl == 0xffffffff) {
		*hlvl = vlvl;
		return 0;
	}

	for (i = 0; i < cx_rail->num; i++) {
		if (cx_rail->val[i] >= vlvl) {
			*hlvl = i;
			return 0;
		}
	}

	return -EINVAL;
}

/*
 * setup_gx_arc_votes - Build the gpu dcvs voting table
 * @hfi: Pointer to hfi device
 * @pri_rail: Pointer to primary power rail vlvl table
 * @sec_rail: Pointer to second/dependent power rail vlvl table
 *
 * This function initializes the gx votes for all gpu frequencies
 * for gpu dcvs
 */
static int setup_gx_arc_votes(struct adreno_device *adreno_dev,
	struct rpmh_arc_vals *pri_rail, struct rpmh_arc_vals *sec_rail,
	struct rpmh_arc_vals *cx_rail)
{
	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
	struct gen8_dcvs_table *table = &gmu->dcvs_table;
	u32 index;
	u16 vlvl_tbl[MAX_GX_LEVELS];
	u32 gx_votes[MAX_GX_LEVELS];
	int ret, i;

	if (pwr->num_pwrlevels + 1 > ARRAY_SIZE(vlvl_tbl)) {
		dev_err(device->dev,
			"Defined more GPU DCVS levels than RPMh can support\n");
		return -ERANGE;
	}

	/* Add the zero powerlevel for the perf table */
	table->gpu_level_num = pwr->num_pwrlevels + 1;

	memset(vlvl_tbl, 0, sizeof(vlvl_tbl));

	table->gx_votes[0].freq = 0;
	table->gx_votes[0].cx_vote = 0;
	/* Disable cx vote in gmu dcvs table if it is not supported in DT */
	if (pwr->pwrlevels[0].cx_level == 0xffffffff)
		table->gx_votes[0].cx_vote = 0xffffffff;

	/* GMU power levels are in ascending order */
	for (index = 1, i = pwr->num_pwrlevels - 1; i >= 0; i--, index++) {
		u32 cx_vlvl = pwr->pwrlevels[i].cx_level;

		vlvl_tbl[index] = pwr->pwrlevels[i].voltage_level;
		table->gx_votes[index].freq = pwr->pwrlevels[i].gpu_freq / 1000;

		ret = to_cx_hlvl(cx_rail, cx_vlvl,
				&table->gx_votes[index].cx_vote);
		if (ret) {
			dev_err(device->dev, "Unsupported cx corner: %u\n",
					cx_vlvl);
			return ret;
		}
	}

	ret = setup_volt_dependency_tbl(gx_votes, pri_rail,
			sec_rail, vlvl_tbl, table->gpu_level_num);
	if (!ret) {
		for (i = 0; i < table->gpu_level_num; i++)
			table->gx_votes[i].vote = gx_votes[i];
	}

	return ret;

}

static int build_dcvs_table(struct adreno_device *adreno_dev)
{
	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
	struct rpmh_arc_vals gx_arc, cx_arc, mx_arc;
	int ret;

	ret = rpmh_arc_cmds(&gx_arc, "gfx.lvl");
	if (ret)
		return ret;

	ret = rpmh_arc_cmds(&cx_arc, "cx.lvl");
	if (ret)
		return ret;

	ret = rpmh_arc_cmds(&mx_arc, "mx.lvl");
	if (ret)
		return ret;

	ret = setup_cx_arc_votes(gmu, &cx_arc, &mx_arc);
	if (ret)
		return ret;

	return setup_gx_arc_votes(adreno_dev, &gx_arc, &mx_arc, &cx_arc);
}

/*
 * List of Bus Control Modules (BCMs) that need to be configured for the GPU
 * to access DDR. For each bus level we will generate a vote each BC
 */
static struct bcm gen8_ddr_bcms[] = {
	{ .name = "SH0", .buswidth = 16 },
	{ .name = "MC0", .buswidth = 4 },
	{ .name = "ACV", .fixed = true },
};

/* Same as above, but for the CNOC BCMs */
static struct bcm gen8_cnoc_bcms[] = {
	{ .name = "CN0", .buswidth = 4 },
};

static void build_bw_table_cmd(struct hfi_bwtable_cmd *cmd,
		struct rpmh_bw_votes *ddr, struct rpmh_bw_votes *cnoc)
{
	u32 i, j;

	cmd->bw_level_num = ddr->num_levels;
	cmd->ddr_cmds_num = ddr->num_cmds;
	cmd->ddr_wait_bitmask = ddr->wait_bitmask;

	for (i = 0; i < ddr->num_cmds; i++)
		cmd->ddr_cmd_addrs[i] = ddr->addrs[i];

	for (i = 0; i < ddr->num_levels; i++)
		for (j = 0; j < ddr->num_cmds; j++)
			cmd->ddr_cmd_data[i][j] = (u32) ddr->cmds[i][j];

	if (!cnoc)
		return;

	cmd->cnoc_cmds_num = cnoc->num_cmds;
		cmd->cnoc_wait_bitmask = cnoc->wait_bitmask;

	for (i = 0; i < cnoc->num_cmds; i++)
		cmd->cnoc_cmd_addrs[i] = cnoc->addrs[i];

	for (i = 0; i < cnoc->num_levels; i++)
		for (j = 0; j < cnoc->num_cmds; j++)
			cmd->cnoc_cmd_data[i][j] = (u32) cnoc->cmds[i][j];
}

/* BIT(2) is used to vote for GPU performance mode through GMU */
#define ACV_GPU_PERFMODE_VOTE	BIT(2)

static int build_bw_table(struct adreno_device *adreno_dev)
{
	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
	const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev);
	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
	struct rpmh_bw_votes *ddr, *cnoc = NULL;
	u32 perfmode_lvl = kgsl_pwrctrl_get_acv_perfmode_lvl(device,
			gen8_core->acv_perfmode_ddr_freq);
	u32 *cnoc_table;
	u32 count;
	int ret;

	ddr = build_rpmh_bw_votes(gen8_ddr_bcms, ARRAY_SIZE(gen8_ddr_bcms),
		pwr->ddr_table, pwr->ddr_table_count, ACV_GPU_PERFMODE_VOTE, perfmode_lvl);
	if (IS_ERR(ddr))
		return PTR_ERR(ddr);

	cnoc_table = kgsl_bus_get_table(device->pdev, "qcom,bus-table-cnoc",
		&count);

	if (count > 0)
		cnoc = build_rpmh_bw_votes(gen8_cnoc_bcms,
			ARRAY_SIZE(gen8_cnoc_bcms), cnoc_table, count, 0, 0);

	kfree(cnoc_table);

	if (IS_ERR(cnoc)) {
		free_rpmh_bw_votes(ddr);
		return PTR_ERR(cnoc);
	}

	ret = CMD_MSG_HDR(gmu->hfi.bw_table, H2F_MSG_BW_VOTE_TBL);
	if (ret)
		return ret;

	build_bw_table_cmd(&gmu->hfi.bw_table, ddr, cnoc);

	free_rpmh_bw_votes(ddr);
	free_rpmh_bw_votes(cnoc);

	return 0;
}

int gen8_build_rpmh_tables(struct adreno_device *adreno_dev)
{
	int ret;

	ret = build_dcvs_table(adreno_dev);
	if (ret) {
		dev_err(adreno_dev->dev.dev, "Failed to build dcvs table\n");
		return ret;
	}

	ret = build_bw_table(adreno_dev);
	if (ret)
		dev_err(adreno_dev->dev.dev, "Failed to build bw table\n");

	return ret;
}