Add 'qcom/opensource/graphics-kernel/' from commit 'b4fdc4c04295ac59109ae19d64747522740c3f14'

git-subtree-dir: qcom/opensource/graphics-kernel git-subtree-mainline: 992813d9c1 git-subtree-split: b4fdc4c042 Change-Id: repo: https://git.codelinaro.org/clo/la/platform/vendor/qcom/opensource/graphics-kernel tag: GRAPHICS.LA.14.0.r1-07700-lanai.0
2024-10-06 16:44:56 +02:00 · 2024-10-06 16:44:56 +02:00 · 880d405719
commit 880d405719
parent 992813d9c1 b4fdc4c042
195 changed files with 145115 additions and 0 deletions
--- a/qcom/opensource/graphics-kernel/Android.bp
+++ b/qcom/opensource/graphics-kernel/Android.bp
@ -0,0 +1,35 @@
+headers_src = [
+    "include/uapi/linux/*.h",
+]
+
+gfx_headers_out = [
+    "linux/msm_kgsl.h",
+]
+
+gfx_kernel_headers_verbose = "--verbose "
+genrule {
+    name: "qti_generate_gfx_kernel_headers",
+    tools: ["headers_install.sh",
+            "unifdef"
+    ],
+    tool_files: [
+         "gfx_kernel_headers.py",
+    ],
+    srcs: headers_src,
+    cmd: "python3 -u $(location gfx_kernel_headers.py) " +
+        gfx_kernel_headers_verbose +
+        "--header_arch arm64 " +
+        "--gen_dir $(genDir) " +
+        "--gfx_include_uapi $(locations include/uapi/linux/*.h) " +
+        "--unifdef $(location unifdef) " +
+        "--headers_install $(location headers_install.sh)",
+    out: gfx_headers_out,
+}
+
+cc_library_headers {
+    name: "qti_gfx_kernel_uapi",
+    generated_headers: ["qti_generate_gfx_kernel_headers"],
+    export_generated_headers: ["qti_generate_gfx_kernel_headers"],
+    vendor: true,
+    recovery_available: true
+}
--- a/qcom/opensource/graphics-kernel/Android.mk
+++ b/qcom/opensource/graphics-kernel/Android.mk
@ -0,0 +1,57 @@
+ifeq ($(TARGET_USES_QMAA),true)
+	KGSL_ENABLED := false
+	ifeq ($(TARGET_USES_QMAA_OVERRIDE_GFX),true)
+		KGSL_ENABLED := true
+	endif # TARGET_USES_QMAA_OVERRIDE_GFX
+else
+	KGSL_ENABLED := true
+endif # TARGET_USES_QMAA
+
+ifeq ($(ENABLE_HYP), true)
+        KGSL_ENABLED := false
+endif
+
+LOCAL_MODULE_DDK_BUILD := true
+LOCAL_MODULE_DDK_ALLOW_UNSAFE_HEADERS := true
+
+ifeq ($(KGSL_ENABLED),true)
+KGSL_SELECT := CONFIG_QCOM_KGSL=m
+
+LOCAL_PATH := $(call my-dir)
+include $(CLEAR_VARS)
+
+# This makefile is only for DLKM
+ifneq ($(findstring vendor,$(LOCAL_PATH)),)
+
+ifeq ($(BOARD_COMMON_DIR),)
+	BOARD_COMMON_DIR := device/qcom/common
+endif
+
+DLKM_DIR   := $(BOARD_COMMON_DIR)/dlkm
+
+KBUILD_OPTIONS += BOARD_PLATFORM=$(TARGET_BOARD_PLATFORM)
+KBUILD_OPTIONS += $(KGSL_SELECT)
+KBUILD_OPTIONS += MODNAME=msm_kgsl
+ifeq ($(TARGET_BOARD_PLATFORM), pineapple)
+	KBUILD_OPTIONS += KBUILD_EXTRA_SYMBOLS+=$(PWD)/$(call intermediates-dir-for,DLKM,hw-fence-module-symvers)/Module.symvers
+endif
+
+include $(CLEAR_VARS)
+# For incremental compilation
+LOCAL_SRC_FILES   := $(wildcard $(LOCAL_PATH)/**/*) $(wildcard $(LOCAL_PATH)/*)
+LOCAL_MODULE      := msm_kgsl.ko
+LOCAL_MODULE_KBUILD_NAME  := msm_kgsl.ko
+LOCAL_MODULE_TAGS         := optional
+LOCAL_MODULE_DEBUG_ENABLE := true
+LOCAL_MODULE_PATH := $(KERNEL_MODULES_OUT)
+
+ifeq ($(TARGET_BOARD_PLATFORM), pineapple)
+	LOCAL_REQUIRED_MODULES    := hw-fence-module-symvers
+	LOCAL_ADDITIONAL_DEPENDENCIES := $(call intermediates-dir-for,DLKM,hw-fence-module-symvers)/Module.symvers
+endif
+# Include msm_kgsl.ko in the /vendor/lib/modules (vendor.img)
+BOARD_VENDOR_KERNEL_MODULES += $(LOCAL_MODULE_PATH)/$(LOCAL_MODULE)
+include $(DLKM_DIR)/Build_external_kernelmodule.mk
+
+endif # DLKM check
+endif # KGSL_ENABLED
--- a/qcom/opensource/graphics-kernel/BUILD.bazel
+++ b/qcom/opensource/graphics-kernel/BUILD.bazel
@ -0,0 +1,6 @@
+load(":build/kgsl_defs.bzl", "define_target_module")
+
+define_target_module("pineapple")
+define_target_module("sun")
+define_target_module("blair")
+define_target_module("monaco")
--- a/qcom/opensource/graphics-kernel/Kbuild
+++ b/qcom/opensource/graphics-kernel/Kbuild
@ -0,0 +1,159 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+KDIR := $(TOP)/kernel_platform/common
+
+ifeq ($(KGSL_PATH),)
+KGSL_PATH=$(src)
+endif
+
+# If we're not GVM and not in an Android tree, select KGSL config
+ifeq ($(CONFIG_QTI_QUIN_GVM),)
+	ifeq ($(ANDROID_BUILD_TOP),)
+		CONFIG_QCOM_KGSL = m
+	endif
+endif
+
+ifeq ($(CONFIG_ARCH_WAIPIO), y)
+	include $(KGSL_PATH)/config/gki_waipiodisp.conf
+endif
+ifeq ($(CONFIG_ARCH_KALAMA), y)
+	include $(KGSL_PATH)/config/gki_kalama.conf
+endif
+ifeq ($(CONFIG_ARCH_PINEAPPLE), y)
+	include $(KGSL_PATH)/config/gki_pineapple.conf
+endif
+ifeq ($(CONFIG_ARCH_BLAIR), y)
+	include $(KGSL_PATH)/config/gki_blair.conf
+endif
+ifeq ($(CONFIG_ARCH_PITTI), y)
+	include $(KGSL_PATH)/config/gki_pitti.conf
+endif
+ifeq ($(CONFIG_ARCH_SA8155), y)
+	include $(KGSL_PATH)/config/gki_sa8155.conf
+endif
+ifeq ($(CONFIG_ARCH_KHAJE), y)
+	include $(KGSL_PATH)/config/gki_khajedisp.conf
+endif
+ifeq ($(CONFIG_ARCH_SA8195), y)
+	include $(KGSL_PATH)/config/gki_sa8155.conf
+endif
+ifeq ($(CONFIG_ARCH_SA6155), y)
+	include $(KGSL_PATH)/config/gki_sa8155.conf
+endif
+ifeq ($(CONFIG_ARCH_MONACO), y)
+	include $(KGSL_PATH)/config/gki_monaco.conf
+endif
+ifeq ($(CONFIG_ARCH_LEMANS), y)
+	include $(KGSL_PATH)/config/gki_lemans.conf
+endif
+ifeq ($(CONFIG_ARCH_KONA), y)
+        include $(KGSL_PATH)/config/gki_kona.conf
+endif
+ifeq ($(CONFIG_ARCH_TRINKET), y)
+	include $(KGSL_PATH)/config/gki_trinket.conf
+endif
+ifeq ($(CONFIG_ARCH_QCS405), y)
+	include $(KGSL_PATH)/config/gki_qcs405.conf
+endif
+ifeq ($(CONFIG_ARCH_HOLI), y)
+	include $(KGSL_PATH)/config/gki_blair.conf
+endif
+
+ccflags-y += -I$(KGSL_PATH) -I$(KGSL_PATH)/include/linux -I$(KGSL_PATH)/include -I$(KERNEL_SRC)/drivers/devfreq
+
+obj-$(CONFIG_QCOM_KGSL) += msm_kgsl.o
+
+msm_kgsl-y = \
+	kgsl.o \
+	kgsl_bus.o \
+	kgsl_drawobj.o \
+	kgsl_events.o \
+	kgsl_eventlog.o \
+	kgsl_gmu_core.o \
+	kgsl_ioctl.o \
+	kgsl_mmu.o \
+	kgsl_pwrctrl.o \
+	kgsl_pwrscale.o \
+	kgsl_regmap.o \
+	kgsl_sharedmem.o \
+	kgsl_snapshot.o \
+	kgsl_timeline.o \
+	kgsl_trace.o \
+	kgsl_util.o \
+	kgsl_vbo.o
+
+msm_kgsl-$(CONFIG_COMPAT) += kgsl_compat.o
+msm_kgsl-$(CONFIG_DEBUG_FS) += kgsl_debugfs.o
+msm_kgsl-$(CONFIG_ARM_SMMU) += kgsl_iommu.o
+msm_kgsl-$(CONFIG_SYNC_FILE) += kgsl_sync.o
+msm_kgsl-$(CONFIG_QCOM_KGSL_PROCESS_RECLAIM) += kgsl_reclaim.o
+
+ifndef CONFIG_QCOM_KGSL_USE_SHMEM
+	msm_kgsl-y += kgsl_pool.o
+endif
+
+msm_kgsl-y += \
+	adreno.o \
+	adreno_a3xx.o \
+	adreno_a3xx_perfcounter.o \
+	adreno_a3xx_ringbuffer.o \
+	adreno_a3xx_snapshot.o \
+	adreno_a5xx.o \
+	adreno_a5xx_perfcounter.o \
+	adreno_a5xx_preempt.o \
+	adreno_a5xx_ringbuffer.o \
+	adreno_a5xx_snapshot.o \
+	adreno_a6xx.o \
+	adreno_a6xx_gmu.o \
+	adreno_a6xx_gmu_snapshot.o \
+	adreno_a6xx_hfi.o \
+	adreno_a6xx_hwsched.o \
+	adreno_a6xx_hwsched_hfi.o \
+	adreno_a6xx_perfcounter.o \
+	adreno_a6xx_preempt.o \
+	adreno_a6xx_rgmu.o \
+	adreno_a6xx_ringbuffer.o \
+	adreno_a6xx_rpmh.o \
+	adreno_a6xx_snapshot.o \
+	adreno_cp_parser.o \
+	adreno_dispatch.o \
+	adreno_drawctxt.o \
+	adreno_gen7.o \
+	adreno_gen7_gmu.o \
+	adreno_gen7_gmu_snapshot.o \
+	adreno_gen7_hfi.o \
+	adreno_gen7_hwsched.o \
+	adreno_gen7_hwsched_hfi.o \
+	adreno_gen7_perfcounter.o \
+	adreno_gen7_preempt.o \
+	adreno_gen7_ringbuffer.o \
+	adreno_gen7_rpmh.o \
+	adreno_gen7_snapshot.o \
+	adreno_gen8.o \
+	adreno_gen8_gmu.o \
+	adreno_gen8_gmu_snapshot.o \
+	adreno_gen8_hfi.o \
+	adreno_gen8_hwsched.o \
+	adreno_gen8_hwsched_hfi.o \
+	adreno_gen8_perfcounter.o \
+	adreno_gen8_preempt.o \
+	adreno_gen8_ringbuffer.o \
+	adreno_gen8_rpmh.o \
+	adreno_gen8_snapshot.o \
+	adreno_hwsched.o \
+	adreno_ioctl.o \
+	adreno_perfcounter.o \
+	adreno_ringbuffer.o \
+	adreno_snapshot.o \
+	adreno_sysfs.o \
+	adreno_trace.o \
+	governor_msm_adreno_tz.o \
+	governor_gpubw_mon.o
+
+msm_kgsl-$(CONFIG_COMPAT) += adreno_compat.o
+msm_kgsl-$(CONFIG_QCOM_KGSL_CORESIGHT) += adreno_coresight.o
+msm_kgsl-$(CONFIG_QCOM_KGSL_CORESIGHT) += adreno_a3xx_coresight.o
+msm_kgsl-$(CONFIG_QCOM_KGSL_CORESIGHT) += adreno_a5xx_coresight.o
+msm_kgsl-$(CONFIG_QCOM_KGSL_CORESIGHT) += adreno_a6xx_coresight.o
+msm_kgsl-$(CONFIG_QCOM_KGSL_CORESIGHT) += adreno_gen7_coresight.o
+msm_kgsl-$(CONFIG_DEBUG_FS) += adreno_debugfs.o adreno_profile.o
--- a/qcom/opensource/graphics-kernel/Kconfig
+++ b/qcom/opensource/graphics-kernel/Kconfig
@ -0,0 +1,120 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config QCOM_KGSL
+	tristate "Qualcomm Technologies, Inc. 3D Graphics driver"
+	depends on ARCH_QCOM
+	depends on NVMEM_QCOM_QFPROM || QCOM_QFPROM
+	select QCOM_MDT_LOADER
+	select INTERVAL_TREE
+	select TRACE_GPU_MEM
+	help
+	  3D graphics driver for the Adreno family of GPUs from QTI.
+	  Required to use hardware accelerated OpenGL, compute and Vulkan
+	  on QTI targets. This includes power management, memory management,
+	  and scheduling for the Adreno GPUs.
+
+config DEVFREQ_GOV_QCOM_ADRENO_TZ
+	tristate "Qualcomm Technologies, Inc. GPU frequency governor"
+	depends on PM_DEVFREQ
+	help
+	  GPU frequency governor for the Adreno GPU. Sets the frequency
+	  using an "on demand" algorithm in conjunction with other
+	  components on Adreno platforms. This is not useful for non-Adreno
+	  devices.
+
+config DEVFREQ_GOV_QCOM_GPUBW_MON
+	tristate "Qualcomm Technologies, Inc. GPU bandwidth governor"
+	depends on DEVFREQ_GOV_QCOM_ADRENO_TZ
+	help
+	  This governor works together with the Adreno GPU governor to
+	  select bus frequency votes using an "on-demand" algorithm.
+	  This governor will not be useful for non-Adreno based
+	  targets.
+
+config QCOM_KGSL_FENCE_TRACE
+	bool "Enable built-in tracing for adreno fence timeouts"
+	help
+	  A boolean flag used to create a KGSL-specific tracing instance
+	  under <tracefs>/tracing/instances/kgsl-fence that can be used
+	  for debugging timeouts for fences between KGSL-contexts and
+	  sync-point blocks. If unsure, say 'N' here.
+
+config QCOM_ADRENO_DEFAULT_GOVERNOR
+	string "devfreq governor for the adreno core"
+	default "msm-adreno-tz"
+
+config QCOM_KGSL_CORESIGHT
+	bool "Enable coresight support for the Adreno GPU"
+	depends on CORESIGHT
+	default y
+	help
+	  When enabled, the Adreno GPU is available as a source for Coresight
+	  data. On a6xx targets there are two sources available for the GX and
+	  CX domains respectively. Debug kernels should say 'Y' here.
+
+config QCOM_KGSL_IOCOHERENCY_DEFAULT
+	bool "Enable I/O coherency on cached GPU memory by default"
+	default y if ARCH_LAHAINA
+	help
+	 Say 'Y' here to enable I/O cache coherency by default on targets that
+	 support hardware I/O coherency. If enabled all cached GPU memory
+	 will use I/O coherency regardless of the user flags. If not enabled
+	 the user can still selectively enable I/O coherency with a flag.
+
+config QCOM_KGSL_IDLE_TIMEOUT
+	int
+	default 80
+	help
+	  GPU idle timeout for Adreno GPU. This value decides after how
+	  long the GPU will go into slumber. A higher value will mean that
+	  the GPU is powered ON for a longer duration which will have
+	  power costs.
+
+config QCOM_KGSL_CONTEXT_DEBUG
+	bool "Log kgsl context information for all processes"
+	help
+	  When enabled, total number of KGSL contexts, number of attached and
+	  detached contexts are dumped into kernel log for all the processes.
+	  This gives insight about the number of contexts held by each process.
+
+config QCOM_KGSL_SORT_POOL
+	bool "Sort pool page list based on physical address"
+	default y
+	help
+	  When enabled, the pool page list is sorted based on physical
+	  addresses. This can be turned on for targets where better DDR
+	  efficiency is attained on accesses for adjacent memory.
+
+config QCOM_KGSL_QDSS_STM
+	bool "Enable support for QDSS STM for Adreno GPU"
+	depends on CORESIGHT
+	help
+	  When enabled, the Adreno GPU QDSS STM support is enabled. GPU QDSS STM
+	  memory will be mapped to GPU and QDSS clock needed to access this memory
+	  is voted. Debug kernels should say 'Y' here.
+
+config QCOM_KGSL_USE_SHMEM
+	bool "Enable using shmem for memory allocations"
+	depends on SHMEM
+	help
+	  Say 'Y' to enable using shmem for memory allocations. If enabled,
+	  there will be no support for the memory pools and higher order pages.
+	  But using shmem will help in making kgsl pages available for
+	  reclaiming.
+
+config QCOM_KGSL_PROCESS_RECLAIM
+	bool "Make driver pages available for reclaim"
+	select QCOM_KGSL_USE_SHMEM
+	help
+	  Say 'Y' to make driver pages available for reclaiming. If enabled,
+	  shmem will be used for allocation. kgsl would know the process
+	  foreground/background activity through the sysfs entry exposed per
+	  process. Based on this kgsl can unpin given number of pages from
+	  background processes and make them available to the shrinker.
+
+config QCOM_KGSL_HIBERNATION
+	bool "Enable Hibernation support in KGSL"
+	depends on HIBERNATION
+	help
+	  Say 'Y' to enable hibernation support in kgsl. If enabled, kgsl
+	  will register necessary power manager callbacks to support
+	  hibernation.
--- a/qcom/opensource/graphics-kernel/Makefile
+++ b/qcom/opensource/graphics-kernel/Makefile
@ -0,0 +1,17 @@
+ifeq ($(KGSL_MODULE_ROOT),)
+CUR_MKFILE = $(abspath $(lastword $(MAKEFILE_LIST)))
+KGSL_MODULE_ROOT = $(dir $(CUR_MKFILE))
+endif
+
+KBUILD_OPTIONS+=KGSL_PATH=$(KGSL_MODULE_ROOT)
+
+all: modules
+
+modules_install:
+	$(MAKE) INSTALL_MOD_STRIP=1 -C $(KERNEL_SRC) M=$(M) modules_install
+
+clean:
+	rm -f *.cmd *.d *.mod *.o *.ko *.mod.c *.mod.o Module.symvers modules.order
+
+%:
+	$(MAKE) -C $(KERNEL_SRC) M=$(M) $@ $(KBUILD_OPTIONS)
--- a/qcom/opensource/graphics-kernel/a3xx_reg.h
+++ b/qcom/opensource/graphics-kernel/a3xx_reg.h
@ -0,0 +1,564 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2012-2017,2019-2020, The Linux Foundation. All rights reserved.
+ */
+
+#ifndef _A300_REG_H
+#define _A300_REG_H
+
+/* Interrupt bit positions within RBBM_INT_0 */
+
+#define A3XX_INT_RBBM_GPU_IDLE 0
+#define A3XX_INT_RBBM_AHB_ERROR 1
+#define A3XX_INT_RBBM_REG_TIMEOUT 2
+#define A3XX_INT_RBBM_ME_MS_TIMEOUT 3
+#define A3XX_INT_RBBM_PFP_MS_TIMEOUT 4
+#define A3XX_INT_RBBM_ATB_BUS_OVERFLOW 5
+#define A3XX_INT_VFD_ERROR 6
+#define A3XX_INT_CP_SW_INT 7
+#define A3XX_INT_CP_T0_PACKET_IN_IB 8
+#define A3XX_INT_CP_OPCODE_ERROR 9
+#define A3XX_INT_CP_RESERVED_BIT_ERROR 10
+#define A3XX_INT_CP_HW_FAULT 11
+#define A3XX_INT_CP_DMA 12
+#define A3XX_INT_CP_IB2_INT 13
+#define A3XX_INT_CP_IB1_INT 14
+#define A3XX_INT_CP_RB_INT 15
+#define A3XX_INT_CP_REG_PROTECT_FAULT 16
+#define A3XX_INT_CP_RB_DONE_TS 17
+#define A3XX_INT_CP_VS_DONE_TS 18
+#define A3XX_INT_CP_PS_DONE_TS 19
+#define A3XX_INT_CACHE_FLUSH_TS 20
+#define A3XX_INT_CP_AHB_ERROR_HALT 21
+#define A3XX_INT_MISC_HANG_DETECT 24
+#define A3XX_INT_UCHE_OOB_ACCESS 25
+
+/* Register definitions */
+
+#define A3XX_RBBM_CLOCK_CTL 0x010
+#define A3XX_RBBM_SP_HYST_CNT 0x012
+#define A3XX_RBBM_SW_RESET_CMD 0x018
+#define A3XX_RBBM_AHB_CTL0 0x020
+#define A3XX_RBBM_AHB_CTL1 0x021
+#define A3XX_RBBM_AHB_CMD 0x022
+#define A3XX_RBBM_AHB_ERROR_STATUS 0x027
+#define A3XX_RBBM_GPR0_CTL 0x02E
+/* This the same register as on A2XX, just in a different place */
+#define A3XX_RBBM_STATUS 0x030
+#define A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL 0x33
+#define A3XX_RBBM_INTERFACE_HANG_INT_CTL 0x50
+#define A3XX_RBBM_INT_CLEAR_CMD 0x061
+#define A3XX_RBBM_INT_0_MASK 0x063
+#define A3XX_RBBM_INT_0_STATUS 0x064
+#define A3XX_RBBM_PERFCTR_CTL 0x80
+#define A3XX_RBBM_PERFCTR_LOAD_CMD0 0x81
+#define A3XX_RBBM_PERFCTR_LOAD_CMD1 0x82
+#define A3XX_RBBM_PERFCTR_LOAD_VALUE_LO 0x84
+#define A3XX_RBBM_PERFCTR_LOAD_VALUE_HI 0x85
+#define A3XX_RBBM_PERFCOUNTER0_SELECT 0x86
+#define A3XX_RBBM_PERFCOUNTER1_SELECT 0x87
+#define A3XX_RBBM_GPU_BUSY_MASKED 0x88
+#define A3XX_RBBM_PERFCTR_CP_0_LO 0x90
+#define A3XX_RBBM_PERFCTR_CP_0_HI 0x91
+#define A3XX_RBBM_PERFCTR_RBBM_0_LO 0x92
+#define A3XX_RBBM_PERFCTR_RBBM_0_HI 0x93
+#define A3XX_RBBM_PERFCTR_RBBM_1_LO 0x94
+#define A3XX_RBBM_PERFCTR_RBBM_1_HI 0x95
+#define A3XX_RBBM_PERFCTR_PC_0_LO 0x96
+#define A3XX_RBBM_PERFCTR_PC_0_HI 0x97
+#define A3XX_RBBM_PERFCTR_PC_1_LO 0x98
+#define A3XX_RBBM_PERFCTR_PC_1_HI 0x99
+#define A3XX_RBBM_PERFCTR_PC_2_LO 0x9A
+#define A3XX_RBBM_PERFCTR_PC_2_HI 0x9B
+#define A3XX_RBBM_PERFCTR_PC_3_LO 0x9C
+#define A3XX_RBBM_PERFCTR_PC_3_HI 0x9D
+#define A3XX_RBBM_PERFCTR_VFD_0_LO 0x9E
+#define A3XX_RBBM_PERFCTR_VFD_0_HI 0x9F
+#define A3XX_RBBM_PERFCTR_VFD_1_LO 0xA0
+#define A3XX_RBBM_PERFCTR_VFD_1_HI 0xA1
+#define A3XX_RBBM_PERFCTR_HLSQ_0_LO 0xA2
+#define A3XX_RBBM_PERFCTR_HLSQ_0_HI 0xA3
+#define A3XX_RBBM_PERFCTR_HLSQ_1_LO 0xA4
+#define A3XX_RBBM_PERFCTR_HLSQ_1_HI 0xA5
+#define A3XX_RBBM_PERFCTR_HLSQ_2_LO 0xA6
+#define A3XX_RBBM_PERFCTR_HLSQ_2_HI 0xA7
+#define A3XX_RBBM_PERFCTR_HLSQ_3_LO 0xA8
+#define A3XX_RBBM_PERFCTR_HLSQ_3_HI 0xA9
+#define A3XX_RBBM_PERFCTR_HLSQ_4_LO 0xAA
+#define A3XX_RBBM_PERFCTR_HLSQ_4_HI 0xAB
+#define A3XX_RBBM_PERFCTR_HLSQ_5_LO 0xAC
+#define A3XX_RBBM_PERFCTR_HLSQ_5_HI 0xAD
+#define A3XX_RBBM_PERFCTR_VPC_0_LO 0xAE
+#define A3XX_RBBM_PERFCTR_VPC_0_HI 0xAF
+#define A3XX_RBBM_PERFCTR_VPC_1_LO 0xB0
+#define A3XX_RBBM_PERFCTR_VPC_1_HI 0xB1
+#define A3XX_RBBM_PERFCTR_TSE_0_LO 0xB2
+#define A3XX_RBBM_PERFCTR_TSE_0_HI 0xB3
+#define A3XX_RBBM_PERFCTR_TSE_1_LO 0xB4
+#define A3XX_RBBM_PERFCTR_TSE_1_HI 0xB5
+#define A3XX_RBBM_PERFCTR_RAS_0_LO 0xB6
+#define A3XX_RBBM_PERFCTR_RAS_0_HI 0xB7
+#define A3XX_RBBM_PERFCTR_RAS_1_LO 0xB8
+#define A3XX_RBBM_PERFCTR_RAS_1_HI 0xB9
+#define A3XX_RBBM_PERFCTR_UCHE_0_LO 0xBA
+#define A3XX_RBBM_PERFCTR_UCHE_0_HI 0xBB
+#define A3XX_RBBM_PERFCTR_UCHE_1_LO 0xBC
+#define A3XX_RBBM_PERFCTR_UCHE_1_HI 0xBD
+#define A3XX_RBBM_PERFCTR_UCHE_2_LO 0xBE
+#define A3XX_RBBM_PERFCTR_UCHE_2_HI 0xBF
+#define A3XX_RBBM_PERFCTR_UCHE_3_LO 0xC0
+#define A3XX_RBBM_PERFCTR_UCHE_3_HI 0xC1
+#define A3XX_RBBM_PERFCTR_UCHE_4_LO 0xC2
+#define A3XX_RBBM_PERFCTR_UCHE_4_HI 0xC3
+#define A3XX_RBBM_PERFCTR_UCHE_5_LO 0xC4
+#define A3XX_RBBM_PERFCTR_UCHE_5_HI 0xC5
+#define A3XX_RBBM_PERFCTR_TP_0_LO 0xC6
+#define A3XX_RBBM_PERFCTR_TP_0_HI 0xC7
+#define A3XX_RBBM_PERFCTR_TP_1_LO 0xC8
+#define A3XX_RBBM_PERFCTR_TP_1_HI 0xC9
+#define A3XX_RBBM_PERFCTR_TP_2_LO 0xCA
+#define A3XX_RBBM_PERFCTR_TP_2_HI 0xCB
+#define A3XX_RBBM_PERFCTR_TP_3_LO 0xCC
+#define A3XX_RBBM_PERFCTR_TP_3_HI 0xCD
+#define A3XX_RBBM_PERFCTR_TP_4_LO 0xCE
+#define A3XX_RBBM_PERFCTR_TP_4_HI 0xCF
+#define A3XX_RBBM_PERFCTR_TP_5_LO 0xD0
+#define A3XX_RBBM_PERFCTR_TP_5_HI 0xD1
+#define A3XX_RBBM_PERFCTR_SP_0_LO 0xD2
+#define A3XX_RBBM_PERFCTR_SP_0_HI 0xD3
+#define A3XX_RBBM_PERFCTR_SP_1_LO 0xD4
+#define A3XX_RBBM_PERFCTR_SP_1_HI 0xD5
+#define A3XX_RBBM_PERFCTR_SP_2_LO 0xD6
+#define A3XX_RBBM_PERFCTR_SP_2_HI 0xD7
+#define A3XX_RBBM_PERFCTR_SP_3_LO 0xD8
+#define A3XX_RBBM_PERFCTR_SP_3_HI 0xD9
+#define A3XX_RBBM_PERFCTR_SP_4_LO 0xDA
+#define A3XX_RBBM_PERFCTR_SP_4_HI 0xDB
+#define A3XX_RBBM_PERFCTR_SP_5_LO 0xDC
+#define A3XX_RBBM_PERFCTR_SP_5_HI 0xDD
+#define A3XX_RBBM_PERFCTR_SP_6_LO 0xDE
+#define A3XX_RBBM_PERFCTR_SP_6_HI 0xDF
+#define A3XX_RBBM_PERFCTR_SP_7_LO 0xE0
+#define A3XX_RBBM_PERFCTR_SP_7_HI 0xE1
+#define A3XX_RBBM_PERFCTR_RB_0_LO 0xE2
+#define A3XX_RBBM_PERFCTR_RB_0_HI 0xE3
+#define A3XX_RBBM_PERFCTR_RB_1_LO 0xE4
+#define A3XX_RBBM_PERFCTR_RB_1_HI 0xE5
+
+#define A3XX_RBBM_RBBM_CTL 0x100
+#define A3XX_RBBM_PERFCTR_PWR_0_LO 0x0EA
+#define A3XX_RBBM_PERFCTR_PWR_0_HI 0x0EB
+#define A3XX_RBBM_PERFCTR_PWR_1_LO 0x0EC
+#define A3XX_RBBM_PERFCTR_PWR_1_HI 0x0ED
+#define A3XX_RBBM_DEBUG_BUS_CTL 0x111
+#define A3XX_RBBM_DEBUG_BUS_DATA_STATUS 0x112
+#define A3XX_RBBM_DEBUG_BUS_STB_CTL0 0x11B
+#define A3XX_RBBM_DEBUG_BUS_STB_CTL1 0x11C
+#define A3XX_RBBM_INT_TRACE_BUS_CTL 0x11D
+#define A3XX_RBBM_EXT_TRACE_BUS_CTL 0x11E
+#define A3XX_RBBM_EXT_TRACE_STOP_CNT 0x11F
+#define A3XX_RBBM_EXT_TRACE_START_CNT 0x120
+#define A3XX_RBBM_EXT_TRACE_PERIOD_CNT 0x121
+#define A3XX_RBBM_EXT_TRACE_CMD 0x122
+#define A3XX_CP_RB_BASE 0x01C0
+#define A3XX_CP_RB_CNTL 0x01C1
+#define A3XX_CP_RB_RPTR 0x01C4
+#define A3XX_CP_RB_WPTR 0x01C5
+/* Following two are same as on A2XX, just in a different place */
+#define A3XX_CP_PFP_UCODE_ADDR 0x1C9
+#define A3XX_CP_PFP_UCODE_DATA 0x1CA
+#define A3XX_CP_ROQ_ADDR 0x1CC
+#define A3XX_CP_ROQ_DATA 0x1CD
+#define A3XX_CP_MERCIU_ADDR 0x1D1
+#define A3XX_CP_MERCIU_DATA 0x1D2
+#define A3XX_CP_MERCIU_DATA2 0x1D3
+#define A3XX_CP_QUEUE_THRESHOLDS 0x01D5
+#define A3XX_CP_MEQ_ADDR 0x1DA
+#define A3XX_CP_MEQ_DATA 0x1DB
+#define A3XX_CP_STATE_DEBUG_INDEX 0x01EC
+#define A3XX_CP_STATE_DEBUG_DATA 0x01ED
+#define A3XX_CP_CNTL 0x01F4
+#define A3XX_CP_WFI_PEND_CTR 0x01F5
+#define A3XX_CP_ME_CNTL 0x01F6
+#define A3XX_CP_ME_STATUS 0x01F7
+#define A3XX_CP_ME_RAM_WADDR 0x01F8
+#define A3XX_CP_ME_RAM_RADDR 0x01F9
+#define A3XX_CP_ME_RAM_DATA 0x01FA
+#define A3XX_CP_DEBUG 0x01FC
+
+#define A3XX_RBBM_PM_OVERRIDE2 0x039D
+
+#define A3XX_CP_PERFCOUNTER_SELECT 0x445
+#define A3XX_CP_IB1_BASE 0x0458
+#define A3XX_CP_IB1_BUFSZ 0x0459
+#define A3XX_CP_IB2_BASE 0x045A
+#define A3XX_CP_IB2_BUFSZ 0x045B
+
+#define A3XX_CP_HW_FAULT  0x45C
+#define A3XX_CP_PROTECT_CTRL 0x45E
+#define A3XX_CP_PROTECT_STATUS 0x45F
+#define A3XX_CP_PROTECT_REG_0 0x460
+#define A3XX_CP_STAT 0x047F
+#define A3XX_CP_SCRATCH_REG0 0x578
+#define A3XX_CP_SCRATCH_REG6 0x57E
+#define A3XX_CP_SCRATCH_REG7 0x57F
+#define A3XX_VSC_SIZE_ADDRESS 0xC02
+#define A3XX_VSC_PIPE_DATA_ADDRESS_0 0xC07
+#define A3XX_VSC_PIPE_DATA_LENGTH_0 0xC08
+#define A3XX_VSC_PIPE_DATA_ADDRESS_1 0xC0A
+#define A3XX_VSC_PIPE_DATA_LENGTH_1 0xC0B
+#define A3XX_VSC_PIPE_DATA_ADDRESS_2 0xC0D
+#define A3XX_VSC_PIPE_DATA_LENGTH_2 0xC0E
+#define A3XX_VSC_PIPE_DATA_ADDRESS_3 0xC10
+#define A3XX_VSC_PIPE_DATA_LENGTH_3 0xC11
+#define A3XX_VSC_PIPE_DATA_ADDRESS_4 0xC13
+#define A3XX_VSC_PIPE_DATA_LENGTH_4 0xC14
+#define A3XX_VSC_PIPE_DATA_ADDRESS_5 0xC16
+#define A3XX_VSC_PIPE_DATA_LENGTH_5 0xC17
+#define A3XX_VSC_PIPE_DATA_ADDRESS_6 0xC19
+#define A3XX_VSC_PIPE_DATA_LENGTH_6 0xC1A
+#define A3XX_VSC_PIPE_DATA_ADDRESS_7 0xC1C
+#define A3XX_VSC_PIPE_DATA_LENGTH_7 0xC1D
+#define A3XX_PC_PERFCOUNTER0_SELECT 0xC48
+#define A3XX_PC_PERFCOUNTER1_SELECT 0xC49
+#define A3XX_PC_PERFCOUNTER2_SELECT 0xC4A
+#define A3XX_PC_PERFCOUNTER3_SELECT 0xC4B
+#define A3XX_GRAS_TSE_DEBUG_ECO 0xC81
+#define A3XX_GRAS_PERFCOUNTER0_SELECT 0xC88
+#define A3XX_GRAS_PERFCOUNTER1_SELECT 0xC89
+#define A3XX_GRAS_PERFCOUNTER2_SELECT 0xC8A
+#define A3XX_GRAS_PERFCOUNTER3_SELECT 0xC8B
+#define A3XX_GRAS_CL_USER_PLANE_X0 0xCA0
+#define A3XX_GRAS_CL_USER_PLANE_Y0 0xCA1
+#define A3XX_GRAS_CL_USER_PLANE_Z0 0xCA2
+#define A3XX_GRAS_CL_USER_PLANE_W0 0xCA3
+#define A3XX_GRAS_CL_USER_PLANE_X1 0xCA4
+#define A3XX_GRAS_CL_USER_PLANE_Y1 0xCA5
+#define A3XX_GRAS_CL_USER_PLANE_Z1 0xCA6
+#define A3XX_GRAS_CL_USER_PLANE_W1 0xCA7
+#define A3XX_GRAS_CL_USER_PLANE_X2 0xCA8
+#define A3XX_GRAS_CL_USER_PLANE_Y2 0xCA9
+#define A3XX_GRAS_CL_USER_PLANE_Z2 0xCAA
+#define A3XX_GRAS_CL_USER_PLANE_W2 0xCAB
+#define A3XX_GRAS_CL_USER_PLANE_X3 0xCAC
+#define A3XX_GRAS_CL_USER_PLANE_Y3 0xCAD
+#define A3XX_GRAS_CL_USER_PLANE_Z3 0xCAE
+#define A3XX_GRAS_CL_USER_PLANE_W3 0xCAF
+#define A3XX_GRAS_CL_USER_PLANE_X4 0xCB0
+#define A3XX_GRAS_CL_USER_PLANE_Y4 0xCB1
+#define A3XX_GRAS_CL_USER_PLANE_Z4 0xCB2
+#define A3XX_GRAS_CL_USER_PLANE_W4 0xCB3
+#define A3XX_GRAS_CL_USER_PLANE_X5 0xCB4
+#define A3XX_GRAS_CL_USER_PLANE_Y5 0xCB5
+#define A3XX_GRAS_CL_USER_PLANE_Z5 0xCB6
+#define A3XX_GRAS_CL_USER_PLANE_W5 0xCB7
+#define A3XX_RB_GMEM_BASE_ADDR 0xCC0
+#define A3XX_RB_DEBUG_ECO_CONTROLS_ADDR 0xCC1
+#define A3XX_RB_PERFCOUNTER0_SELECT   0xCC6
+#define A3XX_RB_PERFCOUNTER1_SELECT   0xCC7
+#define A3XX_RB_FRAME_BUFFER_DIMENSION 0xCE0
+#define A3XX_SQ_GPR_MANAGEMENT 0x0D00
+#define A3XX_SQ_INST_STORE_MANAGEMENT 0x0D02
+#define A3XX_HLSQ_PERFCOUNTER0_SELECT 0xE00
+#define A3XX_HLSQ_PERFCOUNTER1_SELECT 0xE01
+#define A3XX_HLSQ_PERFCOUNTER2_SELECT 0xE02
+#define A3XX_HLSQ_PERFCOUNTER3_SELECT 0xE03
+#define A3XX_HLSQ_PERFCOUNTER4_SELECT 0xE04
+#define A3XX_HLSQ_PERFCOUNTER5_SELECT 0xE05
+#define A3XX_TP0_CHICKEN 0x0E1E
+#define A3XX_VFD_PERFCOUNTER0_SELECT 0xE44
+#define A3XX_VFD_PERFCOUNTER1_SELECT 0xE45
+#define A3XX_VPC_VPC_DEBUG_RAM_SEL 0xE61
+#define A3XX_VPC_VPC_DEBUG_RAM_READ 0xE62
+#define A3XX_VPC_PERFCOUNTER0_SELECT 0xE64
+#define A3XX_VPC_PERFCOUNTER1_SELECT 0xE65
+#define A3XX_UCHE_CACHE_MODE_CONTROL_REG 0xE82
+#define A3XX_UCHE_PERFCOUNTER0_SELECT 0xE84
+#define A3XX_UCHE_PERFCOUNTER1_SELECT 0xE85
+#define A3XX_UCHE_PERFCOUNTER2_SELECT 0xE86
+#define A3XX_UCHE_PERFCOUNTER3_SELECT 0xE87
+#define A3XX_UCHE_PERFCOUNTER4_SELECT 0xE88
+#define A3XX_UCHE_PERFCOUNTER5_SELECT 0xE89
+#define A3XX_UCHE_CACHE_INVALIDATE0_REG 0xEA0
+#define A3XX_UCHE_CACHE_INVALIDATE1_REG 0xEA1
+#define A3XX_UCHE_CACHE_WAYS_VFD 0xEA6
+#define A3XX_SP_PERFCOUNTER0_SELECT 0xEC4
+#define A3XX_SP_PERFCOUNTER1_SELECT 0xEC5
+#define A3XX_SP_PERFCOUNTER2_SELECT 0xEC6
+#define A3XX_SP_PERFCOUNTER3_SELECT 0xEC7
+#define A3XX_SP_PERFCOUNTER4_SELECT 0xEC8
+#define A3XX_SP_PERFCOUNTER5_SELECT 0xEC9
+#define A3XX_SP_PERFCOUNTER6_SELECT 0xECA
+#define A3XX_SP_PERFCOUNTER7_SELECT 0xECB
+#define A3XX_TP_PERFCOUNTER0_SELECT 0xF04
+#define A3XX_TP_PERFCOUNTER1_SELECT 0xF05
+#define A3XX_TP_PERFCOUNTER2_SELECT 0xF06
+#define A3XX_TP_PERFCOUNTER3_SELECT 0xF07
+#define A3XX_TP_PERFCOUNTER4_SELECT 0xF08
+#define A3XX_TP_PERFCOUNTER5_SELECT 0xF09
+#define A3XX_GRAS_CL_CLIP_CNTL 0x2040
+#define A3XX_GRAS_CL_GB_CLIP_ADJ 0x2044
+#define A3XX_GRAS_CL_VPORT_XOFFSET 0x2048
+#define A3XX_GRAS_CL_VPORT_XSCALE 0x2049
+#define A3XX_GRAS_CL_VPORT_YOFFSET 0x204A
+#define A3XX_GRAS_CL_VPORT_YSCALE 0x204B
+#define A3XX_GRAS_CL_VPORT_ZOFFSET 0x204C
+#define A3XX_GRAS_CL_VPORT_ZSCALE 0x204D
+#define A3XX_GRAS_SU_POINT_MINMAX 0x2068
+#define A3XX_GRAS_SU_POINT_SIZE 0x2069
+#define A3XX_GRAS_SU_POLY_OFFSET_SCALE 0x206C
+#define A3XX_GRAS_SU_POLY_OFFSET_OFFSET 0x206D
+#define A3XX_GRAS_SU_MODE_CONTROL 0x2070
+#define A3XX_GRAS_SC_CONTROL 0x2072
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL 0x2074
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR 0x2075
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL 0x2079
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR 0x207A
+#define A3XX_RB_MODE_CONTROL 0x20C0
+#define A3XX_RB_RENDER_CONTROL 0x20C1
+#define A3XX_RB_MSAA_CONTROL 0x20C2
+#define A3XX_RB_ALPHA_REFERENCE 0x20C3
+#define A3XX_RB_MRT_CONTROL0 0x20C4
+#define A3XX_RB_MRT_BUF_INFO0 0x20C5
+#define A3XX_RB_MRT_BUF_BASE0 0x20C6
+#define A3XX_RB_MRT_BLEND_CONTROL0 0x20C7
+#define A3XX_RB_MRT_CONTROL1 0x20C8
+#define A3XX_RB_MRT_BUF_INFO1 0x20C9
+#define A3XX_RB_MRT_BUF_BASE1 0x20CA
+#define A3XX_RB_MRT_BLEND_CONTROL1 0x20CB
+#define A3XX_RB_MRT_CONTROL2 0x20CC
+#define A3XX_RB_MRT_BUF_INFO2 0x20CD
+#define A3XX_RB_MRT_BUF_BASE2 0x20CE
+#define A3XX_RB_MRT_BLEND_CONTROL2 0x20CF
+#define A3XX_RB_MRT_CONTROL3 0x20D0
+#define A3XX_RB_MRT_BUF_INFO3 0x20D1
+#define A3XX_RB_MRT_BUF_BASE3 0x20D2
+#define A3XX_RB_MRT_BLEND_CONTROL3 0x20D3
+#define A3XX_RB_BLEND_RED 0x20E4
+#define A3XX_RB_BLEND_GREEN 0x20E5
+#define A3XX_RB_BLEND_BLUE 0x20E6
+#define A3XX_RB_BLEND_ALPHA 0x20E7
+#define A3XX_RB_CLEAR_COLOR_DW0 0x20E8
+#define A3XX_RB_CLEAR_COLOR_DW1 0x20E9
+#define A3XX_RB_CLEAR_COLOR_DW2 0x20EA
+#define A3XX_RB_CLEAR_COLOR_DW3 0x20EB
+#define A3XX_RB_COPY_CONTROL 0x20EC
+#define A3XX_RB_COPY_DEST_BASE 0x20ED
+#define A3XX_RB_COPY_DEST_PITCH 0x20EE
+#define A3XX_RB_COPY_DEST_INFO 0x20EF
+#define A3XX_RB_DEPTH_CONTROL 0x2100
+#define A3XX_RB_DEPTH_CLEAR 0x2101
+#define A3XX_RB_DEPTH_BUF_INFO 0x2102
+#define A3XX_RB_DEPTH_BUF_PITCH 0x2103
+#define A3XX_RB_STENCIL_CONTROL 0x2104
+#define A3XX_RB_STENCIL_CLEAR 0x2105
+#define A3XX_RB_STENCIL_BUF_INFO 0x2106
+#define A3XX_RB_STENCIL_BUF_PITCH 0x2107
+#define A3XX_RB_STENCIL_REF_MASK 0x2108
+#define A3XX_RB_STENCIL_REF_MASK_BF 0x2109
+#define A3XX_RB_LRZ_VSC_CONTROL 0x210C
+#define A3XX_RB_WINDOW_OFFSET 0x210E
+#define A3XX_RB_SAMPLE_COUNT_CONTROL 0x2110
+#define A3XX_RB_SAMPLE_COUNT_ADDR 0x2111
+#define A3XX_RB_Z_CLAMP_MIN 0x2114
+#define A3XX_RB_Z_CLAMP_MAX 0x2115
+#define A3XX_HLSQ_CONTROL_0_REG 0x2200
+#define A3XX_HLSQ_CONTROL_1_REG 0x2201
+#define A3XX_HLSQ_CONTROL_2_REG 0x2202
+#define A3XX_HLSQ_CONTROL_3_REG 0x2203
+#define A3XX_HLSQ_VS_CONTROL_REG 0x2204
+#define A3XX_HLSQ_FS_CONTROL_REG 0x2205
+#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG 0x2206
+#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG 0x2207
+#define A3XX_HLSQ_CL_NDRANGE_0_REG 0x220A
+#define A3XX_HLSQ_CL_NDRANGE_1_REG 0x220B
+#define A3XX_HLSQ_CL_NDRANGE_2_REG 0x220C
+#define A3XX_HLSQ_CL_NDRANGE_3_REG 0x220D
+#define A3XX_HLSQ_CL_NDRANGE_4_REG 0x220E
+#define A3XX_HLSQ_CL_NDRANGE_5_REG 0x220F
+#define A3XX_HLSQ_CL_NDRANGE_6_REG 0x2210
+#define A3XX_HLSQ_CL_CONTROL_0_REG 0x2211
+#define A3XX_HLSQ_CL_CONTROL_1_REG 0x2212
+#define A3XX_HLSQ_CL_KERNEL_CONST_REG 0x2214
+#define A3XX_HLSQ_CL_KERNEL_GROUP_X_REG 0x2215
+#define A3XX_HLSQ_CL_KERNEL_GROUP_Y_REG 0x2216
+#define A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG 0x2217
+#define A3XX_HLSQ_CL_WG_OFFSET_REG 0x221A
+#define A3XX_VFD_FETCH_INSTR_1_0 0x2247
+#define A3XX_VFD_FETCH_INSTR_1_1 0x2249
+#define A3XX_VFD_FETCH_INSTR_1_2 0x224B
+#define A3XX_VFD_FETCH_INSTR_1_3 0x224D
+#define A3XX_VFD_FETCH_INSTR_1_4 0x224F
+#define A3XX_VFD_FETCH_INSTR_1_5 0x2251
+#define A3XX_VFD_FETCH_INSTR_1_6 0x2253
+#define A3XX_VFD_FETCH_INSTR_1_7 0x2255
+#define A3XX_VFD_FETCH_INSTR_1_8 0x2257
+#define A3XX_VFD_FETCH_INSTR_1_9 0x2259
+#define A3XX_VFD_FETCH_INSTR_1_A 0x225B
+#define A3XX_VFD_FETCH_INSTR_1_B 0x225D
+#define A3XX_VFD_FETCH_INSTR_1_C 0x225F
+#define A3XX_VFD_FETCH_INSTR_1_D 0x2261
+#define A3XX_VFD_FETCH_INSTR_1_E 0x2263
+#define A3XX_VFD_FETCH_INSTR_1_F 0x2265
+#define A3XX_SP_SP_CTRL_REG 0x22C0
+#define A3XX_SP_VS_CTRL_REG0 0x22C4
+#define A3XX_SP_VS_CTRL_REG1 0x22C5
+#define A3XX_SP_VS_PARAM_REG 0x22C6
+#define A3XX_SP_VS_OUT_REG_0 0x22C7
+#define A3XX_SP_VS_OUT_REG_1 0x22C8
+#define A3XX_SP_VS_OUT_REG_2 0x22C9
+#define A3XX_SP_VS_OUT_REG_3 0x22CA
+#define A3XX_SP_VS_OUT_REG_4 0x22CB
+#define A3XX_SP_VS_OUT_REG_5 0x22CC
+#define A3XX_SP_VS_OUT_REG_6 0x22CD
+#define A3XX_SP_VS_OUT_REG_7 0x22CE
+#define A3XX_SP_VS_VPC_DST_REG_0 0x22D0
+#define A3XX_SP_VS_VPC_DST_REG_1 0x22D1
+#define A3XX_SP_VS_VPC_DST_REG_2 0x22D2
+#define A3XX_SP_VS_VPC_DST_REG_3 0x22D3
+#define A3XX_SP_VS_OBJ_OFFSET_REG 0x22D4
+#define A3XX_SP_VS_OBJ_START_REG 0x22D5
+#define A3XX_SP_VS_PVT_MEM_PARAM_REG 0x22D6
+#define A3XX_SP_VS_PVT_MEM_ADDR_REG 0x22D7
+#define A3XX_SP_VS_PVT_MEM_SIZE_REG 0x22D8
+#define A3XX_SP_VS_LENGTH_REG 0x22DF
+#define A3XX_SP_FS_CTRL_REG0 0x22E0
+#define A3XX_SP_FS_CTRL_REG1 0x22E1
+#define A3XX_SP_FS_OBJ_OFFSET_REG 0x22E2
+#define A3XX_SP_FS_OBJ_START_REG 0x22E3
+#define A3XX_SP_FS_PVT_MEM_PARAM_REG 0x22E4
+#define A3XX_SP_FS_PVT_MEM_ADDR_REG 0x22E5
+#define A3XX_SP_FS_PVT_MEM_SIZE_REG 0x22E6
+#define A3XX_SP_FS_FLAT_SHAD_MODE_REG_0 0x22E8
+#define A3XX_SP_FS_FLAT_SHAD_MODE_REG_1 0x22E9
+#define A3XX_SP_FS_OUTPUT_REG 0x22EC
+#define A3XX_SP_FS_MRT_REG_0 0x22F0
+#define A3XX_SP_FS_MRT_REG_1 0x22F1
+#define A3XX_SP_FS_MRT_REG_2 0x22F2
+#define A3XX_SP_FS_MRT_REG_3 0x22F3
+#define A3XX_SP_FS_IMAGE_OUTPUT_REG_0 0x22F4
+#define A3XX_SP_FS_IMAGE_OUTPUT_REG_1 0x22F5
+#define A3XX_SP_FS_IMAGE_OUTPUT_REG_2 0x22F6
+#define A3XX_SP_FS_IMAGE_OUTPUT_REG_3 0x22F7
+#define A3XX_SP_FS_LENGTH_REG 0x22FF
+#define A3XX_PA_SC_AA_CONFIG 0x2301
+#define A3XX_VBIF_CLKON 0x3001
+#define A3XX_VBIF_ABIT_SORT 0x301C
+#define A3XX_VBIF_ABIT_SORT_CONF 0x301D
+#define A3XX_VBIF_GATE_OFF_WRREQ_EN 0x302A
+#define A3XX_VBIF_IN_RD_LIM_CONF0 0x302C
+#define A3XX_VBIF_IN_RD_LIM_CONF1 0x302D
+#define A3XX_VBIF_IN_WR_LIM_CONF0 0x3030
+#define A3XX_VBIF_IN_WR_LIM_CONF1 0x3031
+#define A3XX_VBIF_OUT_RD_LIM_CONF0 0x3034
+#define A3XX_VBIF_OUT_WR_LIM_CONF0 0x3035
+#define A3XX_VBIF_DDR_OUT_MAX_BURST 0x3036
+#define A3XX_VBIF_ARB_CTL 0x303C
+#define A3XX_VBIF_ROUND_ROBIN_QOS_ARB 0x3049
+#define A3XX_VBIF_OUT_AXI_AOOO_EN 0x305E
+#define A3XX_VBIF_OUT_AXI_AOOO 0x305F
+#define A3XX_VBIF_PERF_CNT0_LO 0x3073
+#define A3XX_VBIF_PERF_CNT0_HI 0x3074
+#define A3XX_VBIF_PERF_CNT1_LO 0x3075
+#define A3XX_VBIF_PERF_CNT1_HI 0x3076
+#define A3XX_VBIF_PERF_PWR_CNT0_LO 0x3077
+#define A3XX_VBIF_PERF_PWR_CNT0_HI 0x3078
+#define A3XX_VBIF_PERF_PWR_CNT1_LO 0x3079
+#define A3XX_VBIF_PERF_PWR_CNT1_HI 0x307a
+#define A3XX_VBIF_PERF_PWR_CNT2_LO 0x307b
+#define A3XX_VBIF_PERF_PWR_CNT2_HI 0x307c
+
+#define A3XX_VBIF_XIN_HALT_CTRL0 0x3080
+#define A3XX_VBIF_XIN_HALT_CTRL0_MASK 0x3F
+#define A30X_VBIF_XIN_HALT_CTRL0_MASK 0x7
+
+#define A3XX_VBIF_XIN_HALT_CTRL1 0x3081
+
+/* VBIF register offsets for A306 */
+#define A3XX_VBIF2_PERF_CNT_SEL0 0x30d0
+#define A3XX_VBIF2_PERF_CNT_SEL1 0x30d1
+#define A3XX_VBIF2_PERF_CNT_SEL2 0x30d2
+#define A3XX_VBIF2_PERF_CNT_SEL3 0x30d3
+#define A3XX_VBIF2_PERF_CNT_LOW0 0x30d8
+#define A3XX_VBIF2_PERF_CNT_LOW1 0x30d9
+#define A3XX_VBIF2_PERF_CNT_LOW2 0x30da
+#define A3XX_VBIF2_PERF_CNT_LOW3 0x30db
+#define A3XX_VBIF2_PERF_CNT_HIGH0 0x30e0
+#define A3XX_VBIF2_PERF_CNT_HIGH1 0x30e1
+#define A3XX_VBIF2_PERF_CNT_HIGH2 0x30e2
+#define A3XX_VBIF2_PERF_CNT_HIGH3 0x30e3
+
+#define A3XX_VBIF2_PERF_PWR_CNT_EN0 0x3100
+#define A3XX_VBIF2_PERF_PWR_CNT_EN1 0x3101
+#define A3XX_VBIF2_PERF_PWR_CNT_EN2 0x3102
+#define A3XX_VBIF2_PERF_PWR_CNT_LOW0 0x3110
+#define A3XX_VBIF2_PERF_PWR_CNT_LOW1 0x3111
+#define A3XX_VBIF2_PERF_PWR_CNT_LOW2 0x3112
+#define A3XX_VBIF2_PERF_PWR_CNT_HIGH0 0x3118
+#define A3XX_VBIF2_PERF_PWR_CNT_HIGH1 0x3119
+#define A3XX_VBIF2_PERF_PWR_CNT_HIGH2 0x311a
+
+#define A3XX_VBIF_DDR_OUTPUT_RECOVERABLE_HALT_CTRL0 0x3800
+#define A3XX_VBIF_DDR_OUTPUT_RECOVERABLE_HALT_CTRL1 0x3801
+
+/* RBBM Debug bus block IDs */
+#define RBBM_BLOCK_ID_CP               0x1
+#define RBBM_BLOCK_ID_RBBM             0x2
+#define RBBM_BLOCK_ID_VBIF             0x3
+#define RBBM_BLOCK_ID_HLSQ             0x4
+#define RBBM_BLOCK_ID_UCHE             0x5
+#define RBBM_BLOCK_ID_PC               0x8
+#define RBBM_BLOCK_ID_VFD              0x9
+#define RBBM_BLOCK_ID_VPC              0xa
+#define RBBM_BLOCK_ID_TSE              0xb
+#define RBBM_BLOCK_ID_RAS              0xc
+#define RBBM_BLOCK_ID_VSC              0xd
+#define RBBM_BLOCK_ID_SP_0             0x10
+#define RBBM_BLOCK_ID_SP_1             0x11
+#define RBBM_BLOCK_ID_SP_2             0x12
+#define RBBM_BLOCK_ID_SP_3             0x13
+#define RBBM_BLOCK_ID_TPL1_0           0x18
+#define RBBM_BLOCK_ID_TPL1_1           0x19
+#define RBBM_BLOCK_ID_TPL1_2           0x1a
+#define RBBM_BLOCK_ID_TPL1_3           0x1b
+#define RBBM_BLOCK_ID_RB_0             0x20
+#define RBBM_BLOCK_ID_RB_1             0x21
+#define RBBM_BLOCK_ID_RB_2             0x22
+#define RBBM_BLOCK_ID_RB_3             0x23
+#define RBBM_BLOCK_ID_MARB_0           0x28
+#define RBBM_BLOCK_ID_MARB_1           0x29
+#define RBBM_BLOCK_ID_MARB_2           0x2a
+#define RBBM_BLOCK_ID_MARB_3           0x2b
+
+/* RBBM_CLOCK_CTL default value */
+#define A3XX_RBBM_CLOCK_CTL_DEFAULT   0xAAAAAAAA
+#define A320_RBBM_CLOCK_CTL_DEFAULT   0xBFFFFFFF
+#define A330_RBBM_CLOCK_CTL_DEFAULT   0xBFFCFFFF
+
+#define A330_RBBM_GPR0_CTL_DEFAULT    0x00000000
+#define A330v2_RBBM_GPR0_CTL_DEFAULT  0x05515455
+#define A310_RBBM_GPR0_CTL_DEFAULT    0x000000AA
+
+/* COUNTABLE FOR SP PERFCOUNTER */
+#define SP_ALU_ACTIVE_CYCLES           0x1D
+#define SP0_ICL1_MISSES                0x1A
+#define SP_FS_CFLOW_INSTRUCTIONS       0x0C
+
+/* COUNTABLE FOR TSE PERFCOUNTER */
+#define TSE_INPUT_PRIM_NUM             0x0
+
+/* VBIF countables */
+#define VBIF_AXI_TOTAL_BEATS 85
+
+/* VBIF Recoverable HALT bit value */
+#define VBIF_RECOVERABLE_HALT_CTRL 0x1
+
+/*
+ * CP DEBUG settings for A3XX core:
+ * DYNAMIC_CLK_DISABLE [27] - turn off the dynamic clock control
+ * MIU_128BIT_WRITE_ENABLE [25] - Allow 128 bit writes to the VBIF
+ */
+#define A3XX_CP_DEBUG_DEFAULT ((1 << 27) | (1 << 25))
+
+
+#endif
--- a/qcom/opensource/graphics-kernel/a5xx_reg.h
+++ b/qcom/opensource/graphics-kernel/a5xx_reg.h
@ -0,0 +1,902 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2014-2016,2019, The Linux Foundation. All rights reserved.
+ */
+
+#ifndef _A5XX_REG_H
+#define _A5XX_REG_H
+
+/* A5XX interrupt bits */
+#define A5XX_INT_RBBM_GPU_IDLE           0
+#define A5XX_INT_RBBM_AHB_ERROR          1
+#define A5XX_INT_RBBM_TRANSFER_TIMEOUT   2
+#define A5XX_INT_RBBM_ME_MS_TIMEOUT      3
+#define A5XX_INT_RBBM_PFP_MS_TIMEOUT     4
+#define A5XX_INT_RBBM_ETS_MS_TIMEOUT     5
+#define A5XX_INT_RBBM_ATB_ASYNC_OVERFLOW 6
+#define A5XX_INT_RBBM_GPC_ERROR          7
+#define A5XX_INT_CP_SW                   8
+#define A5XX_INT_CP_HW_ERROR             9
+#define A5XX_INT_CP_CCU_FLUSH_DEPTH_TS   10
+#define A5XX_INT_CP_CCU_FLUSH_COLOR_TS   11
+#define A5XX_INT_CP_CCU_RESOLVE_TS       12
+#define A5XX_INT_CP_IB2                  13
+#define A5XX_INT_CP_IB1                  14
+#define A5XX_INT_CP_RB                   15
+#define A5XX_INT_CP_UNUSED_1             16
+#define A5XX_INT_CP_RB_DONE_TS           17
+#define A5XX_INT_CP_WT_DONE_TS           18
+#define A5XX_INT_UNKNOWN_1               19
+#define A5XX_INT_CP_CACHE_FLUSH_TS       20
+#define A5XX_INT_UNUSED_2                21
+#define A5XX_INT_RBBM_ATB_BUS_OVERFLOW   22
+#define A5XX_INT_MISC_HANG_DETECT        23
+#define A5XX_INT_UCHE_OOB_ACCESS         24
+#define A5XX_INT_UCHE_TRAP_INTR          25
+#define A5XX_INT_DEBBUS_INTR_0           26
+#define A5XX_INT_DEBBUS_INTR_1           27
+#define A5XX_INT_GPMU_VOLTAGE_DROOP      28
+#define A5XX_INT_GPMU_FIRMWARE           29
+#define A5XX_INT_ISDB_CPU_IRQ            30
+#define A5XX_INT_ISDB_UNDER_DEBUG        31
+
+/* CP Interrupt bits */
+#define A5XX_CP_OPCODE_ERROR               0
+#define A5XX_CP_RESERVED_BIT_ERROR         1
+#define A5XX_CP_HW_FAULT_ERROR             2
+#define A5XX_CP_DMA_ERROR                  3
+#define A5XX_CP_REGISTER_PROTECTION_ERROR  4
+#define A5XX_CP_AHB_ERROR                  5
+
+/* CP registers */
+#define A5XX_CP_RB_BASE                  0x800
+#define A5XX_CP_RB_BASE_HI               0x801
+#define A5XX_CP_RB_CNTL                  0x802
+#define A5XX_CP_RB_RPTR_ADDR_LO          0x804
+#define A5XX_CP_RB_RPTR_ADDR_HI          0x805
+#define A5XX_CP_RB_RPTR                  0x806
+#define A5XX_CP_RB_WPTR                  0x807
+#define A5XX_CP_PFP_STAT_ADDR            0x808
+#define A5XX_CP_PFP_STAT_DATA            0x809
+#define A5XX_CP_DRAW_STATE_ADDR          0x80B
+#define A5XX_CP_DRAW_STATE_DATA          0x80C
+#define A5XX_CP_CRASH_SCRIPT_BASE_LO     0x817
+#define A5XX_CP_CRASH_SCRIPT_BASE_HI     0x818
+#define A5XX_CP_CRASH_DUMP_CNTL          0x819
+#define A5XX_CP_ME_STAT_ADDR             0x81A
+#define A5XX_CP_ROQ_THRESHOLDS_1         0x81F
+#define A5XX_CP_ROQ_THRESHOLDS_2         0x820
+#define A5XX_CP_ROQ_DBG_ADDR             0x821
+#define A5XX_CP_ROQ_DBG_DATA             0x822
+#define A5XX_CP_MEQ_DBG_ADDR             0x823
+#define A5XX_CP_MEQ_DBG_DATA             0x824
+#define A5XX_CP_MEQ_THRESHOLDS           0x825
+#define A5XX_CP_MERCIU_SIZE              0x826
+#define A5XX_CP_MERCIU_DBG_ADDR          0x827
+#define A5XX_CP_MERCIU_DBG_DATA_1        0x828
+#define A5XX_CP_MERCIU_DBG_DATA_2        0x829
+#define A5XX_CP_PFP_UCODE_DBG_ADDR       0x82A
+#define A5XX_CP_PFP_UCODE_DBG_DATA       0x82B
+#define A5XX_CP_ME_UCODE_DBG_ADDR        0x82F
+#define A5XX_CP_ME_UCODE_DBG_DATA        0x830
+#define A5XX_CP_CNTL                     0x831
+#define A5XX_CP_ME_CNTL                  0x832
+#define A5XX_CP_CHICKEN_DBG              0x833
+#define A5XX_CP_PFP_INSTR_BASE_LO        0x835
+#define A5XX_CP_PFP_INSTR_BASE_HI        0x836
+#define A5XX_CP_PM4_INSTR_BASE_LO        0x838
+#define A5XX_CP_PM4_INSTR_BASE_HI        0x839
+#define A5XX_CP_CONTEXT_SWITCH_CNTL      0x83B
+#define A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_LO   0x83C
+#define A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_HI   0x83D
+#define A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO   0x83E
+#define A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_HI   0x83F
+#define A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO   0x840
+#define A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI   0x841
+#define A5XX_CP_ADDR_MODE_CNTL           0x860
+#define A5XX_CP_ME_STAT_DATA             0xB14
+#define A5XX_CP_WFI_PEND_CTR             0xB15
+#define A5XX_CP_INTERRUPT_STATUS         0xB18
+#define A5XX_CP_HW_FAULT                 0xB1A
+#define A5XX_CP_PROTECT_STATUS           0xB1C
+#define A5XX_CP_IB1_BASE                 0xB1F
+#define A5XX_CP_IB1_BASE_HI              0xB20
+#define A5XX_CP_IB1_BUFSZ                0xB21
+#define A5XX_CP_IB2_BASE                 0xB22
+#define A5XX_CP_IB2_BASE_HI              0xB23
+#define A5XX_CP_IB2_BUFSZ                0xB24
+#define A5XX_CP_PROTECT_REG_0            0x880
+#define A5XX_CP_PROTECT_CNTL             0x8A0
+#define A5XX_CP_AHB_FAULT                0xB1B
+#define A5XX_CP_PERFCTR_CP_SEL_0         0xBB0
+#define A5XX_CP_PERFCTR_CP_SEL_1         0xBB1
+#define A5XX_CP_PERFCTR_CP_SEL_2         0xBB2
+#define A5XX_CP_PERFCTR_CP_SEL_3         0xBB3
+#define A5XX_CP_PERFCTR_CP_SEL_4         0xBB4
+#define A5XX_CP_PERFCTR_CP_SEL_5         0xBB5
+#define A5XX_CP_PERFCTR_CP_SEL_6         0xBB6
+#define A5XX_CP_PERFCTR_CP_SEL_7         0xBB7
+
+#define A5XX_VSC_ADDR_MODE_CNTL          0xBC1
+
+/* CP Power Counter Registers Select */
+#define A5XX_CP_POWERCTR_CP_SEL_0        0xBBA
+#define A5XX_CP_POWERCTR_CP_SEL_1        0xBBB
+#define A5XX_CP_POWERCTR_CP_SEL_2        0xBBC
+#define A5XX_CP_POWERCTR_CP_SEL_3        0xBBD
+
+/* RBBM registers */
+#define A5XX_RBBM_CFG_DBGBUS_SEL_A               0x4
+#define A5XX_RBBM_CFG_DBGBUS_SEL_B               0x5
+#define A5XX_RBBM_CFG_DBGBUS_SEL_C               0x6
+#define A5XX_RBBM_CFG_DBGBUS_SEL_D               0x7
+#define A5XX_RBBM_CFG_DBGBUS_SEL_PING_INDEX_SHIFT    0x0
+#define A5XX_RBBM_CFG_DBGBUS_SEL_PING_BLK_SEL_SHIFT  0x8
+
+#define A5XX_RBBM_CFG_DBGBUS_CNTLT               0x8
+#define A5XX_RBBM_CFG_DBGBUS_CNTLM               0x9
+#define A5XX_RBBM_CFG_DEBBUS_CTLTM_ENABLE_SHIFT  0x18
+#define A5XX_RBBM_CFG_DBGBUS_OPL                 0xA
+#define A5XX_RBBM_CFG_DBGBUS_OPE                 0xB
+#define A5XX_RBBM_CFG_DBGBUS_IVTL_0              0xC
+#define A5XX_RBBM_CFG_DBGBUS_IVTL_1              0xD
+#define A5XX_RBBM_CFG_DBGBUS_IVTL_2              0xE
+#define A5XX_RBBM_CFG_DBGBUS_IVTL_3              0xF
+#define A5XX_RBBM_CFG_DBGBUS_MASKL_0             0x10
+#define A5XX_RBBM_CFG_DBGBUS_MASKL_1             0x11
+#define A5XX_RBBM_CFG_DBGBUS_MASKL_2             0x12
+#define A5XX_RBBM_CFG_DBGBUS_MASKL_3             0x13
+#define A5XX_RBBM_CFG_DBGBUS_BYTEL_0             0x14
+#define A5XX_RBBM_CFG_DBGBUS_BYTEL_1             0x15
+#define A5XX_RBBM_CFG_DBGBUS_IVTE_0              0x16
+#define A5XX_RBBM_CFG_DBGBUS_IVTE_1              0x17
+#define A5XX_RBBM_CFG_DBGBUS_IVTE_2              0x18
+#define A5XX_RBBM_CFG_DBGBUS_IVTE_3              0x19
+#define A5XX_RBBM_CFG_DBGBUS_MASKE_0             0x1A
+#define A5XX_RBBM_CFG_DBGBUS_MASKE_1             0x1B
+#define A5XX_RBBM_CFG_DBGBUS_MASKE_2             0x1C
+#define A5XX_RBBM_CFG_DBGBUS_MASKE_3             0x1D
+#define A5XX_RBBM_CFG_DBGBUS_NIBBLEE             0x1E
+#define A5XX_RBBM_CFG_DBGBUS_PTRC0               0x1F
+#define A5XX_RBBM_CFG_DBGBUS_PTRC1               0x20
+#define A5XX_RBBM_CFG_DBGBUS_LOADREG             0x21
+#define A5XX_RBBM_CFG_DBGBUS_IDX                 0x22
+#define A5XX_RBBM_CFG_DBGBUS_CLRC                0x23
+#define A5XX_RBBM_CFG_DBGBUS_LOADIVT             0x24
+#define A5XX_RBBM_INTERFACE_HANG_INT_CNTL        0x2F
+#define A5XX_RBBM_INT_CLEAR_CMD                  0x37
+#define A5XX_RBBM_INT_0_MASK                     0x38
+#define A5XX_RBBM_AHB_DBG_CNTL                   0x3F
+#define A5XX_RBBM_EXT_VBIF_DBG_CNTL              0x41
+#define A5XX_RBBM_SW_RESET_CMD                   0x43
+#define A5XX_RBBM_BLOCK_SW_RESET_CMD             0x45
+#define A5XX_RBBM_BLOCK_SW_RESET_CMD2            0x46
+#define A5XX_RBBM_DBG_LO_HI_GPIO                 0x48
+#define A5XX_RBBM_EXT_TRACE_BUS_CNTL             0x49
+#define A5XX_RBBM_CLOCK_CNTL_TP0                 0x4A
+#define A5XX_RBBM_CLOCK_CNTL_TP1                 0x4B
+#define A5XX_RBBM_CLOCK_CNTL_TP2                 0x4C
+#define A5XX_RBBM_CLOCK_CNTL_TP3                 0x4D
+#define A5XX_RBBM_CLOCK_CNTL2_TP0                0x4E
+#define A5XX_RBBM_CLOCK_CNTL2_TP1                0x4F
+#define A5XX_RBBM_CLOCK_CNTL2_TP2                0x50
+#define A5XX_RBBM_CLOCK_CNTL2_TP3                0x51
+#define A5XX_RBBM_CLOCK_CNTL3_TP0                0x52
+#define A5XX_RBBM_CLOCK_CNTL3_TP1                0x53
+#define A5XX_RBBM_CLOCK_CNTL3_TP2                0x54
+#define A5XX_RBBM_CLOCK_CNTL3_TP3                0x55
+#define A5XX_RBBM_READ_AHB_THROUGH_DBG           0x59
+#define A5XX_RBBM_CLOCK_CNTL_UCHE                0x5A
+#define A5XX_RBBM_CLOCK_CNTL2_UCHE               0x5B
+#define A5XX_RBBM_CLOCK_CNTL3_UCHE               0x5C
+#define A5XX_RBBM_CLOCK_CNTL4_UCHE               0x5D
+#define A5XX_RBBM_CLOCK_HYST_UCHE                0x5E
+#define A5XX_RBBM_CLOCK_DELAY_UCHE               0x5F
+#define A5XX_RBBM_CLOCK_MODE_GPC                 0x60
+#define A5XX_RBBM_CLOCK_DELAY_GPC                0x61
+#define A5XX_RBBM_CLOCK_HYST_GPC                 0x62
+#define A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM        0x63
+#define A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM        0x64
+#define A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM       0x65
+#define A5XX_RBBM_CLOCK_DELAY_HLSQ               0x66
+#define A5XX_RBBM_CLOCK_CNTL                     0x67
+#define A5XX_RBBM_CLOCK_CNTL_SP0                 0x68
+#define A5XX_RBBM_CLOCK_CNTL_SP1                 0x69
+#define A5XX_RBBM_CLOCK_CNTL_SP2                 0x6A
+#define A5XX_RBBM_CLOCK_CNTL_SP3                 0x6B
+#define A5XX_RBBM_CLOCK_CNTL2_SP0                0x6C
+#define A5XX_RBBM_CLOCK_CNTL2_SP1                0x6D
+#define A5XX_RBBM_CLOCK_CNTL2_SP2                0x6E
+#define A5XX_RBBM_CLOCK_CNTL2_SP3                0x6F
+#define A5XX_RBBM_CLOCK_HYST_SP0                 0x70
+#define A5XX_RBBM_CLOCK_HYST_SP1                 0x71
+#define A5XX_RBBM_CLOCK_HYST_SP2                 0x72
+#define A5XX_RBBM_CLOCK_HYST_SP3                 0x73
+#define A5XX_RBBM_CLOCK_DELAY_SP0                0x74
+#define A5XX_RBBM_CLOCK_DELAY_SP1                0x75
+#define A5XX_RBBM_CLOCK_DELAY_SP2                0x76
+#define A5XX_RBBM_CLOCK_DELAY_SP3                0x77
+#define A5XX_RBBM_CLOCK_CNTL_RB0                 0x78
+#define A5XX_RBBM_CLOCK_CNTL_RB1                 0x79
+#define A5XX_RBBM_CLOCK_CNTL_RB2                 0x7a
+#define A5XX_RBBM_CLOCK_CNTL_RB3                 0x7B
+#define A5XX_RBBM_CLOCK_CNTL2_RB0                0x7C
+#define A5XX_RBBM_CLOCK_CNTL2_RB1                0x7D
+#define A5XX_RBBM_CLOCK_CNTL2_RB2                0x7E
+#define A5XX_RBBM_CLOCK_CNTL2_RB3                0x7F
+#define A5XX_RBBM_CLOCK_HYST_RAC                 0x80
+#define A5XX_RBBM_CLOCK_DELAY_RAC                0x81
+#define A5XX_RBBM_CLOCK_CNTL_CCU0                0x82
+#define A5XX_RBBM_CLOCK_CNTL_CCU1                0x83
+#define A5XX_RBBM_CLOCK_CNTL_CCU2                0x84
+#define A5XX_RBBM_CLOCK_CNTL_CCU3                0x85
+#define A5XX_RBBM_CLOCK_HYST_RB_CCU0             0x86
+#define A5XX_RBBM_CLOCK_HYST_RB_CCU1             0x87
+#define A5XX_RBBM_CLOCK_HYST_RB_CCU2             0x88
+#define A5XX_RBBM_CLOCK_HYST_RB_CCU3             0x89
+#define A5XX_RBBM_CLOCK_CNTL_RAC                 0x8A
+#define A5XX_RBBM_CLOCK_CNTL2_RAC                0x8B
+#define A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0        0x8C
+#define A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1        0x8D
+#define A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2        0x8E
+#define A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3        0x8F
+#define A5XX_RBBM_CLOCK_HYST_VFD                 0x90
+#define A5XX_RBBM_CLOCK_MODE_VFD                 0x91
+#define A5XX_RBBM_CLOCK_DELAY_VFD                0x92
+#define A5XX_RBBM_AHB_CNTL0                      0x93
+#define A5XX_RBBM_AHB_CNTL1                      0x94
+#define A5XX_RBBM_AHB_CNTL2                      0x95
+#define A5XX_RBBM_AHB_CMD                        0x96
+#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11     0x9C
+#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12     0x9D
+#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13     0x9E
+#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14     0x9F
+#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15     0xA0
+#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16     0xA1
+#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17     0xA2
+#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18     0xA3
+#define A5XX_RBBM_CLOCK_DELAY_TP0                0xA4
+#define A5XX_RBBM_CLOCK_DELAY_TP1                0xA5
+#define A5XX_RBBM_CLOCK_DELAY_TP2                0xA6
+#define A5XX_RBBM_CLOCK_DELAY_TP3                0xA7
+#define A5XX_RBBM_CLOCK_DELAY2_TP0               0xA8
+#define A5XX_RBBM_CLOCK_DELAY2_TP1               0xA9
+#define A5XX_RBBM_CLOCK_DELAY2_TP2               0xAA
+#define A5XX_RBBM_CLOCK_DELAY2_TP3               0xAB
+#define A5XX_RBBM_CLOCK_DELAY3_TP0               0xAC
+#define A5XX_RBBM_CLOCK_DELAY3_TP1               0xAD
+#define A5XX_RBBM_CLOCK_DELAY3_TP2               0xAE
+#define A5XX_RBBM_CLOCK_DELAY3_TP3               0xAF
+#define A5XX_RBBM_CLOCK_HYST_TP0                 0xB0
+#define A5XX_RBBM_CLOCK_HYST_TP1                 0xB1
+#define A5XX_RBBM_CLOCK_HYST_TP2                 0xB2
+#define A5XX_RBBM_CLOCK_HYST_TP3                 0xB3
+#define A5XX_RBBM_CLOCK_HYST2_TP0                0xB4
+#define A5XX_RBBM_CLOCK_HYST2_TP1                0xB5
+#define A5XX_RBBM_CLOCK_HYST2_TP2                0xB6
+#define A5XX_RBBM_CLOCK_HYST2_TP3                0xB7
+#define A5XX_RBBM_CLOCK_HYST3_TP0                0xB8
+#define A5XX_RBBM_CLOCK_HYST3_TP1                0xB9
+#define A5XX_RBBM_CLOCK_HYST3_TP2                0xBA
+#define A5XX_RBBM_CLOCK_HYST3_TP3                0xBB
+#define A5XX_RBBM_CLOCK_CNTL_GPMU                0xC8
+#define A5XX_RBBM_CLOCK_DELAY_GPMU               0xC9
+#define A5XX_RBBM_CLOCK_HYST_GPMU                0xCA
+#define A5XX_RBBM_PERFCTR_CP_0_LO                0x3A0
+#define A5XX_RBBM_PERFCTR_CP_0_HI                0x3A1
+#define A5XX_RBBM_PERFCTR_CP_1_LO                0x3A2
+#define A5XX_RBBM_PERFCTR_CP_1_HI                0x3A3
+#define A5XX_RBBM_PERFCTR_CP_2_LO                0x3A4
+#define A5XX_RBBM_PERFCTR_CP_2_HI                0x3A5
+#define A5XX_RBBM_PERFCTR_CP_3_LO                0x3A6
+#define A5XX_RBBM_PERFCTR_CP_3_HI                0x3A7
+#define A5XX_RBBM_PERFCTR_CP_4_LO                0x3A8
+#define A5XX_RBBM_PERFCTR_CP_4_HI                0x3A9
+#define A5XX_RBBM_PERFCTR_CP_5_LO                0x3AA
+#define A5XX_RBBM_PERFCTR_CP_5_HI                0x3AB
+#define A5XX_RBBM_PERFCTR_CP_6_LO                0x3AC
+#define A5XX_RBBM_PERFCTR_CP_6_HI                0x3AD
+#define A5XX_RBBM_PERFCTR_CP_7_LO                0x3AE
+#define A5XX_RBBM_PERFCTR_CP_7_HI                0x3AF
+#define A5XX_RBBM_PERFCTR_RBBM_0_LO              0x3B0
+#define A5XX_RBBM_PERFCTR_RBBM_0_HI              0x3B1
+#define A5XX_RBBM_PERFCTR_RBBM_1_LO              0x3B2
+#define A5XX_RBBM_PERFCTR_RBBM_1_HI              0x3B3
+#define A5XX_RBBM_PERFCTR_RBBM_2_LO              0x3B4
+#define A5XX_RBBM_PERFCTR_RBBM_2_HI              0x3B5
+#define A5XX_RBBM_PERFCTR_RBBM_3_LO              0x3B6
+#define A5XX_RBBM_PERFCTR_RBBM_3_HI              0x3B7
+#define A5XX_RBBM_PERFCTR_PC_0_LO                0x3B8
+#define A5XX_RBBM_PERFCTR_PC_0_HI                0x3B9
+#define A5XX_RBBM_PERFCTR_PC_1_LO                0x3BA
+#define A5XX_RBBM_PERFCTR_PC_1_HI                0x3BB
+#define A5XX_RBBM_PERFCTR_PC_2_LO                0x3BC
+#define A5XX_RBBM_PERFCTR_PC_2_HI                0x3BD
+#define A5XX_RBBM_PERFCTR_PC_3_LO                0x3BE
+#define A5XX_RBBM_PERFCTR_PC_3_HI                0x3BF
+#define A5XX_RBBM_PERFCTR_PC_4_LO                0x3C0
+#define A5XX_RBBM_PERFCTR_PC_4_HI                0x3C1
+#define A5XX_RBBM_PERFCTR_PC_5_LO                0x3C2
+#define A5XX_RBBM_PERFCTR_PC_5_HI                0x3C3
+#define A5XX_RBBM_PERFCTR_PC_6_LO                0x3C4
+#define A5XX_RBBM_PERFCTR_PC_6_HI                0x3C5
+#define A5XX_RBBM_PERFCTR_PC_7_LO                0x3C6
+#define A5XX_RBBM_PERFCTR_PC_7_HI                0x3C7
+#define A5XX_RBBM_PERFCTR_VFD_0_LO               0x3C8
+#define A5XX_RBBM_PERFCTR_VFD_0_HI               0x3C9
+#define A5XX_RBBM_PERFCTR_VFD_1_LO               0x3CA
+#define A5XX_RBBM_PERFCTR_VFD_1_HI               0x3CB
+#define A5XX_RBBM_PERFCTR_VFD_2_LO               0x3CC
+#define A5XX_RBBM_PERFCTR_VFD_2_HI               0x3CD
+#define A5XX_RBBM_PERFCTR_VFD_3_LO               0x3CE
+#define A5XX_RBBM_PERFCTR_VFD_3_HI               0x3CF
+#define A5XX_RBBM_PERFCTR_VFD_4_LO               0x3D0
+#define A5XX_RBBM_PERFCTR_VFD_4_HI               0x3D1
+#define A5XX_RBBM_PERFCTR_VFD_5_LO               0x3D2
+#define A5XX_RBBM_PERFCTR_VFD_5_HI               0x3D3
+#define A5XX_RBBM_PERFCTR_VFD_6_LO               0x3D4
+#define A5XX_RBBM_PERFCTR_VFD_6_HI               0x3D5
+#define A5XX_RBBM_PERFCTR_VFD_7_LO               0x3D6
+#define A5XX_RBBM_PERFCTR_VFD_7_HI               0x3D7
+#define A5XX_RBBM_PERFCTR_HLSQ_0_LO              0x3D8
+#define A5XX_RBBM_PERFCTR_HLSQ_0_HI              0x3D9
+#define A5XX_RBBM_PERFCTR_HLSQ_1_LO              0x3DA
+#define A5XX_RBBM_PERFCTR_HLSQ_1_HI              0x3DB
+#define A5XX_RBBM_PERFCTR_HLSQ_2_LO              0x3DC
+#define A5XX_RBBM_PERFCTR_HLSQ_2_HI              0x3DD
+#define A5XX_RBBM_PERFCTR_HLSQ_3_LO              0x3DE
+#define A5XX_RBBM_PERFCTR_HLSQ_3_HI              0x3DF
+#define A5XX_RBBM_PERFCTR_HLSQ_4_LO              0x3E0
+#define A5XX_RBBM_PERFCTR_HLSQ_4_HI              0x3E1
+#define A5XX_RBBM_PERFCTR_HLSQ_5_LO              0x3E2
+#define A5XX_RBBM_PERFCTR_HLSQ_5_HI              0x3E3
+#define A5XX_RBBM_PERFCTR_HLSQ_6_LO              0x3E4
+#define A5XX_RBBM_PERFCTR_HLSQ_6_HI              0x3E5
+#define A5XX_RBBM_PERFCTR_HLSQ_7_LO              0x3E6
+#define A5XX_RBBM_PERFCTR_HLSQ_7_HI              0x3E7
+#define A5XX_RBBM_PERFCTR_VPC_0_LO               0x3E8
+#define A5XX_RBBM_PERFCTR_VPC_0_HI               0x3E9
+#define A5XX_RBBM_PERFCTR_VPC_1_LO               0x3EA
+#define A5XX_RBBM_PERFCTR_VPC_1_HI               0x3EB
+#define A5XX_RBBM_PERFCTR_VPC_2_LO               0x3EC
+#define A5XX_RBBM_PERFCTR_VPC_2_HI               0x3ED
+#define A5XX_RBBM_PERFCTR_VPC_3_LO               0x3EE
+#define A5XX_RBBM_PERFCTR_VPC_3_HI               0x3EF
+#define A5XX_RBBM_PERFCTR_CCU_0_LO               0x3F0
+#define A5XX_RBBM_PERFCTR_CCU_0_HI               0x3F1
+#define A5XX_RBBM_PERFCTR_CCU_1_LO               0x3F2
+#define A5XX_RBBM_PERFCTR_CCU_1_HI               0x3F3
+#define A5XX_RBBM_PERFCTR_CCU_2_LO               0x3F4
+#define A5XX_RBBM_PERFCTR_CCU_2_HI               0x3F5
+#define A5XX_RBBM_PERFCTR_CCU_3_LO               0x3F6
+#define A5XX_RBBM_PERFCTR_CCU_3_HI               0x3F7
+#define A5XX_RBBM_PERFCTR_TSE_0_LO               0x3F8
+#define A5XX_RBBM_PERFCTR_TSE_0_HI               0x3F9
+#define A5XX_RBBM_PERFCTR_TSE_1_LO               0x3FA
+#define A5XX_RBBM_PERFCTR_TSE_1_HI               0x3FB
+#define A5XX_RBBM_PERFCTR_TSE_2_LO               0x3FC
+#define A5XX_RBBM_PERFCTR_TSE_2_HI               0x3FD
+#define A5XX_RBBM_PERFCTR_TSE_3_LO               0x3FE
+#define A5XX_RBBM_PERFCTR_TSE_3_HI               0x3FF
+#define A5XX_RBBM_PERFCTR_RAS_0_LO               0x400
+#define A5XX_RBBM_PERFCTR_RAS_0_HI               0x401
+#define A5XX_RBBM_PERFCTR_RAS_1_LO               0x402
+#define A5XX_RBBM_PERFCTR_RAS_1_HI               0x403
+#define A5XX_RBBM_PERFCTR_RAS_2_LO               0x404
+#define A5XX_RBBM_PERFCTR_RAS_2_HI               0x405
+#define A5XX_RBBM_PERFCTR_RAS_3_LO               0x406
+#define A5XX_RBBM_PERFCTR_RAS_3_HI               0x407
+#define A5XX_RBBM_PERFCTR_UCHE_0_LO              0x408
+#define A5XX_RBBM_PERFCTR_UCHE_0_HI              0x409
+#define A5XX_RBBM_PERFCTR_UCHE_1_LO              0x40A
+#define A5XX_RBBM_PERFCTR_UCHE_1_HI              0x40B
+#define A5XX_RBBM_PERFCTR_UCHE_2_LO              0x40C
+#define A5XX_RBBM_PERFCTR_UCHE_2_HI              0x40D
+#define A5XX_RBBM_PERFCTR_UCHE_3_LO              0x40E
+#define A5XX_RBBM_PERFCTR_UCHE_3_HI              0x40F
+#define A5XX_RBBM_PERFCTR_UCHE_4_LO              0x410
+#define A5XX_RBBM_PERFCTR_UCHE_4_HI              0x411
+#define A5XX_RBBM_PERFCTR_UCHE_5_LO              0x412
+#define A5XX_RBBM_PERFCTR_UCHE_5_HI              0x413
+#define A5XX_RBBM_PERFCTR_UCHE_6_LO              0x414
+#define A5XX_RBBM_PERFCTR_UCHE_6_HI              0x415
+#define A5XX_RBBM_PERFCTR_UCHE_7_LO              0x416
+#define A5XX_RBBM_PERFCTR_UCHE_7_HI              0x417
+#define A5XX_RBBM_PERFCTR_TP_0_LO                0x418
+#define A5XX_RBBM_PERFCTR_TP_0_HI                0x419
+#define A5XX_RBBM_PERFCTR_TP_1_LO                0x41A
+#define A5XX_RBBM_PERFCTR_TP_1_HI                0x41B
+#define A5XX_RBBM_PERFCTR_TP_2_LO                0x41C
+#define A5XX_RBBM_PERFCTR_TP_2_HI                0x41D
+#define A5XX_RBBM_PERFCTR_TP_3_LO                0x41E
+#define A5XX_RBBM_PERFCTR_TP_3_HI                0x41F
+#define A5XX_RBBM_PERFCTR_TP_4_LO                0x420
+#define A5XX_RBBM_PERFCTR_TP_4_HI                0x421
+#define A5XX_RBBM_PERFCTR_TP_5_LO                0x422
+#define A5XX_RBBM_PERFCTR_TP_5_HI                0x423
+#define A5XX_RBBM_PERFCTR_TP_6_LO                0x424
+#define A5XX_RBBM_PERFCTR_TP_6_HI                0x425
+#define A5XX_RBBM_PERFCTR_TP_7_LO                0x426
+#define A5XX_RBBM_PERFCTR_TP_7_HI                0x427
+#define A5XX_RBBM_PERFCTR_SP_0_LO                0x428
+#define A5XX_RBBM_PERFCTR_SP_0_HI                0x429
+#define A5XX_RBBM_PERFCTR_SP_1_LO                0x42A
+#define A5XX_RBBM_PERFCTR_SP_1_HI                0x42B
+#define A5XX_RBBM_PERFCTR_SP_2_LO                0x42C
+#define A5XX_RBBM_PERFCTR_SP_2_HI                0x42D
+#define A5XX_RBBM_PERFCTR_SP_3_LO                0x42E
+#define A5XX_RBBM_PERFCTR_SP_3_HI                0x42F
+#define A5XX_RBBM_PERFCTR_SP_4_LO                0x430
+#define A5XX_RBBM_PERFCTR_SP_4_HI                0x431
+#define A5XX_RBBM_PERFCTR_SP_5_LO                0x432
+#define A5XX_RBBM_PERFCTR_SP_5_HI                0x433
+#define A5XX_RBBM_PERFCTR_SP_6_LO                0x434
+#define A5XX_RBBM_PERFCTR_SP_6_HI                0x435
+#define A5XX_RBBM_PERFCTR_SP_7_LO                0x436
+#define A5XX_RBBM_PERFCTR_SP_7_HI                0x437
+#define A5XX_RBBM_PERFCTR_SP_8_LO                0x438
+#define A5XX_RBBM_PERFCTR_SP_8_HI                0x439
+#define A5XX_RBBM_PERFCTR_SP_9_LO                0x43A
+#define A5XX_RBBM_PERFCTR_SP_9_HI                0x43B
+#define A5XX_RBBM_PERFCTR_SP_10_LO               0x43C
+#define A5XX_RBBM_PERFCTR_SP_10_HI               0x43D
+#define A5XX_RBBM_PERFCTR_SP_11_LO               0x43E
+#define A5XX_RBBM_PERFCTR_SP_11_HI               0x43F
+#define A5XX_RBBM_PERFCTR_RB_0_LO                0x440
+#define A5XX_RBBM_PERFCTR_RB_0_HI                0x441
+#define A5XX_RBBM_PERFCTR_RB_1_LO                0x442
+#define A5XX_RBBM_PERFCTR_RB_1_HI                0x443
+#define A5XX_RBBM_PERFCTR_RB_2_LO                0x444
+#define A5XX_RBBM_PERFCTR_RB_2_HI                0x445
+#define A5XX_RBBM_PERFCTR_RB_3_LO                0x446
+#define A5XX_RBBM_PERFCTR_RB_3_HI                0x447
+#define A5XX_RBBM_PERFCTR_RB_4_LO                0x448
+#define A5XX_RBBM_PERFCTR_RB_4_HI                0x449
+#define A5XX_RBBM_PERFCTR_RB_5_LO                0x44A
+#define A5XX_RBBM_PERFCTR_RB_5_HI                0x44B
+#define A5XX_RBBM_PERFCTR_RB_6_LO                0x44C
+#define A5XX_RBBM_PERFCTR_RB_6_HI                0x44D
+#define A5XX_RBBM_PERFCTR_RB_7_LO                0x44E
+#define A5XX_RBBM_PERFCTR_RB_7_HI                0x44F
+#define A5XX_RBBM_PERFCTR_VSC_0_LO               0x450
+#define A5XX_RBBM_PERFCTR_VSC_0_HI               0x451
+#define A5XX_RBBM_PERFCTR_VSC_1_LO               0x452
+#define A5XX_RBBM_PERFCTR_VSC_1_HI               0x453
+#define A5XX_RBBM_PERFCTR_LRZ_0_LO               0x454
+#define A5XX_RBBM_PERFCTR_LRZ_0_HI               0x455
+#define A5XX_RBBM_PERFCTR_LRZ_1_LO               0x456
+#define A5XX_RBBM_PERFCTR_LRZ_1_HI               0x457
+#define A5XX_RBBM_PERFCTR_LRZ_2_LO               0x458
+#define A5XX_RBBM_PERFCTR_LRZ_2_HI               0x459
+#define A5XX_RBBM_PERFCTR_LRZ_3_LO               0x45A
+#define A5XX_RBBM_PERFCTR_LRZ_3_HI               0x45B
+#define A5XX_RBBM_PERFCTR_CMP_0_LO               0x45C
+#define A5XX_RBBM_PERFCTR_CMP_0_HI               0x45D
+#define A5XX_RBBM_PERFCTR_CMP_1_LO               0x45E
+#define A5XX_RBBM_PERFCTR_CMP_1_HI               0x45F
+#define A5XX_RBBM_PERFCTR_CMP_2_LO               0x460
+#define A5XX_RBBM_PERFCTR_CMP_2_HI               0x461
+#define A5XX_RBBM_PERFCTR_CMP_3_LO               0x462
+#define A5XX_RBBM_PERFCTR_CMP_3_HI               0x463
+#define A5XX_RBBM_PERFCTR_RBBM_SEL_0             0x46B
+#define A5XX_RBBM_PERFCTR_RBBM_SEL_1             0x46C
+#define A5XX_RBBM_PERFCTR_RBBM_SEL_2             0x46D
+#define A5XX_RBBM_PERFCTR_RBBM_SEL_3             0x46E
+#define A5XX_RBBM_ALWAYSON_COUNTER_LO            0x4D2
+#define A5XX_RBBM_ALWAYSON_COUNTER_HI            0x4D3
+#define A5XX_RBBM_STATUS                         0x4F5
+#define A5XX_RBBM_STATUS3                        0x530
+#define A5XX_RBBM_INT_0_STATUS                   0x4E1
+#define A5XX_RBBM_AHB_ME_SPLIT_STATUS            0x4F0
+#define A5XX_RBBM_AHB_PFP_SPLIT_STATUS           0x4F1
+#define A5XX_RBBM_AHB_ERROR_STATUS               0x4F4
+#define A5XX_RBBM_PERFCTR_CNTL                   0x464
+#define A5XX_RBBM_PERFCTR_LOAD_CMD0              0x465
+#define A5XX_RBBM_PERFCTR_LOAD_CMD1              0x466
+#define A5XX_RBBM_PERFCTR_LOAD_CMD2              0x467
+#define A5XX_RBBM_PERFCTR_LOAD_CMD3              0x468
+#define A5XX_RBBM_PERFCTR_LOAD_VALUE_LO          0x469
+#define A5XX_RBBM_PERFCTR_LOAD_VALUE_HI          0x46A
+#define A5XX_RBBM_PERFCTR_RBBM_SEL_0             0x46B
+#define A5XX_RBBM_PERFCTR_RBBM_SEL_1             0x46C
+#define A5XX_RBBM_PERFCTR_RBBM_SEL_2             0x46D
+#define A5XX_RBBM_PERFCTR_RBBM_SEL_3             0x46E
+#define A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED        0x46F
+#define A5XX_RBBM_CFG_DBGBUS_EVENT_LOGIC         0x504
+#define A5XX_RBBM_CFG_DBGBUS_OVER                0x505
+#define A5XX_RBBM_CFG_DBGBUS_COUNT0              0x506
+#define A5XX_RBBM_CFG_DBGBUS_COUNT1              0x507
+#define A5XX_RBBM_CFG_DBGBUS_COUNT2              0x508
+#define A5XX_RBBM_CFG_DBGBUS_COUNT3              0x509
+#define A5XX_RBBM_CFG_DBGBUS_COUNT4              0x50A
+#define A5XX_RBBM_CFG_DBGBUS_COUNT5              0x50B
+#define A5XX_RBBM_CFG_DBGBUS_TRACE_ADDR          0x50C
+#define A5XX_RBBM_CFG_DBGBUS_TRACE_BUF0          0x50D
+#define A5XX_RBBM_CFG_DBGBUS_TRACE_BUF1          0x50E
+#define A5XX_RBBM_CFG_DBGBUS_TRACE_BUF2          0x50F
+#define A5XX_RBBM_CFG_DBGBUS_TRACE_BUF3          0x510
+#define A5XX_RBBM_CFG_DBGBUS_TRACE_BUF4          0x511
+#define A5XX_RBBM_CFG_DBGBUS_MISR0               0x512
+#define A5XX_RBBM_CFG_DBGBUS_MISR1               0x513
+#define A5XX_RBBM_ISDB_CNT                       0x533
+#define A5XX_RBBM_SECVID_TRUST_CONFIG            0xF000
+#define A5XX_RBBM_SECVID_TRUST_CNTL              0xF400
+#define A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO     0xF800
+#define A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI     0xF801
+#define A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE        0xF802
+#define A5XX_RBBM_SECVID_TSB_CNTL                0xF803
+#define A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL      0xF810
+
+/* VSC registers */
+#define A5XX_VSC_PERFCTR_VSC_SEL_0          0xC60
+#define A5XX_VSC_PERFCTR_VSC_SEL_1          0xC61
+
+#define A5XX_GRAS_ADDR_MODE_CNTL            0xC81
+
+/* TSE registers */
+#define A5XX_GRAS_PERFCTR_TSE_SEL_0         0xC90
+#define A5XX_GRAS_PERFCTR_TSE_SEL_1         0xC91
+#define A5XX_GRAS_PERFCTR_TSE_SEL_2         0xC92
+#define A5XX_GRAS_PERFCTR_TSE_SEL_3         0xC93
+
+/* RAS registers */
+#define A5XX_GRAS_PERFCTR_RAS_SEL_0         0xC94
+#define A5XX_GRAS_PERFCTR_RAS_SEL_1         0xC95
+#define A5XX_GRAS_PERFCTR_RAS_SEL_2         0xC96
+#define A5XX_GRAS_PERFCTR_RAS_SEL_3         0xC97
+
+/* LRZ registers */
+#define A5XX_GRAS_PERFCTR_LRZ_SEL_0         0xC98
+#define A5XX_GRAS_PERFCTR_LRZ_SEL_1         0xC99
+#define A5XX_GRAS_PERFCTR_LRZ_SEL_2         0xC9A
+#define A5XX_GRAS_PERFCTR_LRZ_SEL_3         0xC9B
+
+
+/* RB registers */
+#define A5XX_RB_DBG_ECO_CNT                 0xCC4
+#define A5XX_RB_ADDR_MODE_CNTL              0xCC5
+#define A5XX_RB_MODE_CNTL                   0xCC6
+#define A5XX_RB_PERFCTR_RB_SEL_0            0xCD0
+#define A5XX_RB_PERFCTR_RB_SEL_1            0xCD1
+#define A5XX_RB_PERFCTR_RB_SEL_2            0xCD2
+#define A5XX_RB_PERFCTR_RB_SEL_3            0xCD3
+#define A5XX_RB_PERFCTR_RB_SEL_4            0xCD4
+#define A5XX_RB_PERFCTR_RB_SEL_5            0xCD5
+#define A5XX_RB_PERFCTR_RB_SEL_6            0xCD6
+#define A5XX_RB_PERFCTR_RB_SEL_7            0xCD7
+
+/* CCU registers */
+#define A5XX_RB_PERFCTR_CCU_SEL_0           0xCD8
+#define A5XX_RB_PERFCTR_CCU_SEL_1           0xCD9
+#define A5XX_RB_PERFCTR_CCU_SEL_2           0xCDA
+#define A5XX_RB_PERFCTR_CCU_SEL_3           0xCDB
+
+/* RB Power Counter RB Registers Select */
+#define A5XX_RB_POWERCTR_RB_SEL_0           0xCE0
+#define A5XX_RB_POWERCTR_RB_SEL_1           0xCE1
+#define A5XX_RB_POWERCTR_RB_SEL_2           0xCE2
+#define A5XX_RB_POWERCTR_RB_SEL_3           0xCE3
+
+/* RB Power Counter CCU Registers Select */
+#define A5XX_RB_POWERCTR_CCU_SEL_0          0xCE4
+#define A5XX_RB_POWERCTR_CCU_SEL_1          0xCE5
+
+/* CMP registers */
+#define A5XX_RB_PERFCTR_CMP_SEL_0           0xCEC
+#define A5XX_RB_PERFCTR_CMP_SEL_1           0xCED
+#define A5XX_RB_PERFCTR_CMP_SEL_2           0xCEE
+#define A5XX_RB_PERFCTR_CMP_SEL_3           0xCEF
+
+/* PC registers */
+#define A5XX_PC_DBG_ECO_CNTL                0xD00
+#define A5XX_PC_ADDR_MODE_CNTL              0xD01
+#define A5XX_PC_PERFCTR_PC_SEL_0            0xD10
+#define A5XX_PC_PERFCTR_PC_SEL_1            0xD11
+#define A5XX_PC_PERFCTR_PC_SEL_2            0xD12
+#define A5XX_PC_PERFCTR_PC_SEL_3            0xD13
+#define A5XX_PC_PERFCTR_PC_SEL_4            0xD14
+#define A5XX_PC_PERFCTR_PC_SEL_5            0xD15
+#define A5XX_PC_PERFCTR_PC_SEL_6            0xD16
+#define A5XX_PC_PERFCTR_PC_SEL_7            0xD17
+
+/* HLSQ registers */
+#define A5XX_HLSQ_DBG_ECO_CNTL		    0xE04
+#define A5XX_HLSQ_ADDR_MODE_CNTL            0xE05
+#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_0        0xE10
+#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_1        0xE11
+#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_2        0xE12
+#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_3        0xE13
+#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_4        0xE14
+#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_5        0xE15
+#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_6        0xE16
+#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_7        0xE17
+#define A5XX_HLSQ_DBG_READ_SEL              0xBC00
+#define A5XX_HLSQ_DBG_AHB_READ_APERTURE     0xA000
+
+/* VFD registers */
+#define A5XX_VFD_ADDR_MODE_CNTL             0xE41
+#define A5XX_VFD_PERFCTR_VFD_SEL_0          0xE50
+#define A5XX_VFD_PERFCTR_VFD_SEL_1          0xE51
+#define A5XX_VFD_PERFCTR_VFD_SEL_2          0xE52
+#define A5XX_VFD_PERFCTR_VFD_SEL_3          0xE53
+#define A5XX_VFD_PERFCTR_VFD_SEL_4          0xE54
+#define A5XX_VFD_PERFCTR_VFD_SEL_5          0xE55
+#define A5XX_VFD_PERFCTR_VFD_SEL_6          0xE56
+#define A5XX_VFD_PERFCTR_VFD_SEL_7          0xE57
+
+/* VPC registers */
+#define A5XX_VPC_DBG_ECO_CNTL		    0xE60
+#define A5XX_VPC_ADDR_MODE_CNTL             0xE61
+#define A5XX_VPC_PERFCTR_VPC_SEL_0          0xE64
+#define A5XX_VPC_PERFCTR_VPC_SEL_1          0xE65
+#define A5XX_VPC_PERFCTR_VPC_SEL_2          0xE66
+#define A5XX_VPC_PERFCTR_VPC_SEL_3          0xE67
+
+/* UCHE registers */
+#define A5XX_UCHE_ADDR_MODE_CNTL            0xE80
+#define A5XX_UCHE_MODE_CNTL                 0xE81
+#define A5XX_UCHE_WRITE_THRU_BASE_LO        0xE87
+#define A5XX_UCHE_WRITE_THRU_BASE_HI        0xE88
+#define A5XX_UCHE_TRAP_BASE_LO              0xE89
+#define A5XX_UCHE_TRAP_BASE_HI              0xE8A
+#define A5XX_UCHE_GMEM_RANGE_MIN_LO         0xE8B
+#define A5XX_UCHE_GMEM_RANGE_MIN_HI         0xE8C
+#define A5XX_UCHE_GMEM_RANGE_MAX_LO         0xE8D
+#define A5XX_UCHE_GMEM_RANGE_MAX_HI         0xE8E
+#define A5XX_UCHE_DBG_ECO_CNTL_2            0xE8F
+#define A5XX_UCHE_INVALIDATE0               0xE95
+#define A5XX_UCHE_CACHE_WAYS                0xE96
+#define A5XX_UCHE_PERFCTR_UCHE_SEL_0        0xEA0
+#define A5XX_UCHE_PERFCTR_UCHE_SEL_1        0xEA1
+#define A5XX_UCHE_PERFCTR_UCHE_SEL_2        0xEA2
+#define A5XX_UCHE_PERFCTR_UCHE_SEL_3        0xEA3
+#define A5XX_UCHE_PERFCTR_UCHE_SEL_4        0xEA4
+#define A5XX_UCHE_PERFCTR_UCHE_SEL_5        0xEA5
+#define A5XX_UCHE_PERFCTR_UCHE_SEL_6        0xEA6
+#define A5XX_UCHE_PERFCTR_UCHE_SEL_7        0xEA7
+
+/* UCHE Power Counter UCHE Registers Select */
+#define A5XX_UCHE_POWERCTR_UCHE_SEL_0       0xEA8
+#define A5XX_UCHE_POWERCTR_UCHE_SEL_1       0xEA9
+#define A5XX_UCHE_POWERCTR_UCHE_SEL_2       0xEAA
+#define A5XX_UCHE_POWERCTR_UCHE_SEL_3       0xEAB
+
+/* SP registers */
+#define A5XX_SP_DBG_ECO_CNTL                0xEC0
+#define A5XX_SP_ADDR_MODE_CNTL              0xEC1
+#define A5XX_SP_PERFCTR_SP_SEL_0            0xED0
+#define A5XX_SP_PERFCTR_SP_SEL_1            0xED1
+#define A5XX_SP_PERFCTR_SP_SEL_2            0xED2
+#define A5XX_SP_PERFCTR_SP_SEL_3            0xED3
+#define A5XX_SP_PERFCTR_SP_SEL_4            0xED4
+#define A5XX_SP_PERFCTR_SP_SEL_5            0xED5
+#define A5XX_SP_PERFCTR_SP_SEL_6            0xED6
+#define A5XX_SP_PERFCTR_SP_SEL_7            0xED7
+#define A5XX_SP_PERFCTR_SP_SEL_8            0xED8
+#define A5XX_SP_PERFCTR_SP_SEL_9            0xED9
+#define A5XX_SP_PERFCTR_SP_SEL_10           0xEDA
+#define A5XX_SP_PERFCTR_SP_SEL_11           0xEDB
+
+/* SP Power Counter SP Registers Select */
+#define A5XX_SP_POWERCTR_SP_SEL_0           0xEDC
+#define A5XX_SP_POWERCTR_SP_SEL_1           0xEDD
+#define A5XX_SP_POWERCTR_SP_SEL_2           0xEDE
+#define A5XX_SP_POWERCTR_SP_SEL_3           0xEDF
+
+/* TP registers */
+#define A5XX_TPL1_ADDR_MODE_CNTL            0xF01
+#define A5XX_TPL1_MODE_CNTL                 0xF02
+#define A5XX_TPL1_PERFCTR_TP_SEL_0          0xF10
+#define A5XX_TPL1_PERFCTR_TP_SEL_1          0xF11
+#define A5XX_TPL1_PERFCTR_TP_SEL_2          0xF12
+#define A5XX_TPL1_PERFCTR_TP_SEL_3          0xF13
+#define A5XX_TPL1_PERFCTR_TP_SEL_4          0xF14
+#define A5XX_TPL1_PERFCTR_TP_SEL_5          0xF15
+#define A5XX_TPL1_PERFCTR_TP_SEL_6          0xF16
+#define A5XX_TPL1_PERFCTR_TP_SEL_7          0xF17
+
+/* TP Power Counter TP Registers Select */
+#define A5XX_TPL1_POWERCTR_TP_SEL_0         0xF18
+#define A5XX_TPL1_POWERCTR_TP_SEL_1         0xF19
+#define A5XX_TPL1_POWERCTR_TP_SEL_2         0xF1A
+#define A5XX_TPL1_POWERCTR_TP_SEL_3         0xF1B
+
+/* VBIF registers */
+#define A5XX_VBIF_VERSION                       0x3000
+#define A5XX_VBIF_CLKON                         0x3001
+#define A5XX_VBIF_CLKON_FORCE_ON_TESTBUS_MASK   0x1
+#define A5XX_VBIF_CLKON_FORCE_ON_TESTBUS_SHIFT  0x1
+
+#define A5XX_VBIF_ROUND_ROBIN_QOS_ARB      0x3049
+#define A5XX_VBIF_GATE_OFF_WRREQ_EN        0x302A
+
+#define A5XX_VBIF_XIN_HALT_CTRL0	   0x3080
+#define A5XX_VBIF_XIN_HALT_CTRL0_MASK	   0xF
+#define A510_VBIF_XIN_HALT_CTRL0_MASK	   0x7
+#define A5XX_VBIF_XIN_HALT_CTRL1	   0x3081
+
+#define A5XX_VBIF_TEST_BUS_OUT_CTRL            0x3084
+#define A5XX_VBIF_TEST_BUS_OUT_CTRL_EN_MASK    0x1
+#define A5XX_VBIF_TEST_BUS_OUT_CTRL_EN_SHIFT   0x0
+
+#define A5XX_VBIF_TEST_BUS1_CTRL0                0x3085
+#define A5XX_VBIF_TEST_BUS1_CTRL1                0x3086
+#define A5XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL_MASK  0xF
+#define A5XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL_SHIFT 0x0
+
+#define A5XX_VBIF_TEST_BUS2_CTRL0                   0x3087
+#define A5XX_VBIF_TEST_BUS2_CTRL1                   0x3088
+#define A5XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_MASK     0x1FF
+#define A5XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_SHIFT    0x0
+
+#define A5XX_VBIF_TEST_BUS_OUT             0x308c
+
+#define A5XX_VBIF_PERF_CNT_SEL0            0x30D0
+#define A5XX_VBIF_PERF_CNT_SEL1            0x30D1
+#define A5XX_VBIF_PERF_CNT_SEL2            0x30D2
+#define A5XX_VBIF_PERF_CNT_SEL3            0x30D3
+#define A5XX_VBIF_PERF_CNT_LOW0            0x30D8
+#define A5XX_VBIF_PERF_CNT_LOW1            0x30D9
+#define A5XX_VBIF_PERF_CNT_LOW2            0x30DA
+#define A5XX_VBIF_PERF_CNT_LOW3            0x30DB
+#define A5XX_VBIF_PERF_CNT_HIGH0           0x30E0
+#define A5XX_VBIF_PERF_CNT_HIGH1           0x30E1
+#define A5XX_VBIF_PERF_CNT_HIGH2           0x30E2
+#define A5XX_VBIF_PERF_CNT_HIGH3           0x30E3
+
+#define A5XX_VBIF_PERF_PWR_CNT_EN0         0x3100
+#define A5XX_VBIF_PERF_PWR_CNT_EN1         0x3101
+#define A5XX_VBIF_PERF_PWR_CNT_EN2         0x3102
+
+#define A5XX_VBIF_PERF_PWR_CNT_LOW0        0x3110
+#define A5XX_VBIF_PERF_PWR_CNT_LOW1        0x3111
+#define A5XX_VBIF_PERF_PWR_CNT_LOW2        0x3112
+
+#define A5XX_VBIF_PERF_PWR_CNT_HIGH0       0x3118
+#define A5XX_VBIF_PERF_PWR_CNT_HIGH1       0x3119
+#define A5XX_VBIF_PERF_PWR_CNT_HIGH2       0x311A
+
+/* GPMU registers */
+#define A5XX_GPMU_INST_RAM_BASE            0x8800
+#define A5XX_GPMU_DATA_RAM_BASE            0x9800
+#define A5XX_GPMU_SP_POWER_CNTL            0xA881
+#define A5XX_GPMU_RBCCU_CLOCK_CNTL         0xA886
+#define A5XX_GPMU_RBCCU_POWER_CNTL         0xA887
+#define A5XX_GPMU_SP_PWR_CLK_STATUS        0xA88B
+#define A5XX_GPMU_RBCCU_PWR_CLK_STATUS     0xA88D
+#define A5XX_GPMU_PWR_COL_STAGGER_DELAY    0xA891
+#define A5XX_GPMU_PWR_COL_INTER_FRAME_CTRL 0xA892
+#define A5XX_GPMU_PWR_COL_INTER_FRAME_HYST 0xA893
+#define A5XX_GPMU_PWR_COL_BINNING_CTRL     0xA894
+#define A5XX_GPMU_CLOCK_THROTTLE_CTRL      0xA8A3
+#define A5XX_GPMU_WFI_CONFIG               0xA8C1
+#define A5XX_GPMU_RBBM_INTR_INFO           0xA8D6
+#define A5XX_GPMU_CM3_SYSRESET             0xA8D8
+#define A5XX_GPMU_GENERAL_0                0xA8E0
+#define A5XX_GPMU_GENERAL_1                0xA8E1
+
+/* COUNTABLE FOR SP PERFCOUNTER */
+#define A5XX_SP_ALU_ACTIVE_CYCLES          0x1
+#define A5XX_SP0_ICL1_MISSES               0x35
+#define A5XX_SP_FS_CFLOW_INSTRUCTIONS      0x27
+
+/* COUNTABLE FOR TSE PERFCOUNTER */
+#define A5XX_TSE_INPUT_PRIM_NUM            0x6
+
+/* COUNTABLE FOR RBBM PERFCOUNTER */
+#define A5XX_RBBM_ALWAYS_COUNT		0x0
+
+/* GPMU POWER COUNTERS */
+#define A5XX_SP_POWER_COUNTER_0_LO		0xA840
+#define A5XX_SP_POWER_COUNTER_0_HI		0xA841
+#define A5XX_SP_POWER_COUNTER_1_LO		0xA842
+#define A5XX_SP_POWER_COUNTER_1_HI		0xA843
+#define A5XX_SP_POWER_COUNTER_2_LO		0xA844
+#define A5XX_SP_POWER_COUNTER_2_HI		0xA845
+#define A5XX_SP_POWER_COUNTER_3_LO		0xA846
+#define A5XX_SP_POWER_COUNTER_3_HI		0xA847
+
+#define A5XX_TP_POWER_COUNTER_0_LO		0xA848
+#define A5XX_TP_POWER_COUNTER_0_HI		0xA849
+#define A5XX_TP_POWER_COUNTER_1_LO		0xA84A
+#define A5XX_TP_POWER_COUNTER_1_HI		0xA84B
+#define A5XX_TP_POWER_COUNTER_2_LO		0xA84C
+#define A5XX_TP_POWER_COUNTER_2_HI		0xA84D
+#define A5XX_TP_POWER_COUNTER_3_LO		0xA84E
+#define A5XX_TP_POWER_COUNTER_3_HI		0xA84F
+
+#define A5XX_RB_POWER_COUNTER_0_LO		0xA850
+#define A5XX_RB_POWER_COUNTER_0_HI		0xA851
+#define A5XX_RB_POWER_COUNTER_1_LO		0xA852
+#define A5XX_RB_POWER_COUNTER_1_HI		0xA853
+#define A5XX_RB_POWER_COUNTER_2_LO		0xA854
+#define A5XX_RB_POWER_COUNTER_2_HI		0xA855
+#define A5XX_RB_POWER_COUNTER_3_LO		0xA856
+#define A5XX_RB_POWER_COUNTER_3_HI		0xA857
+
+#define A5XX_CCU_POWER_COUNTER_0_LO		0xA858
+#define A5XX_CCU_POWER_COUNTER_0_HI		0xA859
+#define A5XX_CCU_POWER_COUNTER_1_LO		0xA85A
+#define A5XX_CCU_POWER_COUNTER_1_HI		0xA85B
+
+#define A5XX_UCHE_POWER_COUNTER_0_LO		0xA85C
+#define A5XX_UCHE_POWER_COUNTER_0_HI		0xA85D
+#define A5XX_UCHE_POWER_COUNTER_1_LO		0xA85E
+#define A5XX_UCHE_POWER_COUNTER_1_HI		0xA85F
+#define A5XX_UCHE_POWER_COUNTER_2_LO		0xA860
+#define A5XX_UCHE_POWER_COUNTER_2_HI		0xA861
+#define A5XX_UCHE_POWER_COUNTER_3_LO		0xA862
+#define A5XX_UCHE_POWER_COUNTER_3_HI		0xA863
+
+#define A5XX_CP_POWER_COUNTER_0_LO		0xA864
+#define A5XX_CP_POWER_COUNTER_0_HI		0xA865
+#define A5XX_CP_POWER_COUNTER_1_LO		0xA866
+#define A5XX_CP_POWER_COUNTER_1_HI		0xA867
+#define A5XX_CP_POWER_COUNTER_2_LO		0xA868
+#define A5XX_CP_POWER_COUNTER_2_HI		0xA869
+#define A5XX_CP_POWER_COUNTER_3_LO		0xA86A
+#define A5XX_CP_POWER_COUNTER_3_HI		0xA86B
+
+#define A5XX_GPMU_POWER_COUNTER_0_LO		0xA86C
+#define A5XX_GPMU_POWER_COUNTER_0_HI		0xA86D
+#define A5XX_GPMU_POWER_COUNTER_1_LO		0xA86E
+#define A5XX_GPMU_POWER_COUNTER_1_HI		0xA86F
+#define A5XX_GPMU_POWER_COUNTER_2_LO		0xA870
+#define A5XX_GPMU_POWER_COUNTER_2_HI		0xA871
+#define A5XX_GPMU_POWER_COUNTER_3_LO		0xA872
+#define A5XX_GPMU_POWER_COUNTER_3_HI		0xA873
+#define A5XX_GPMU_POWER_COUNTER_4_LO		0xA874
+#define A5XX_GPMU_POWER_COUNTER_4_HI		0xA875
+#define A5XX_GPMU_POWER_COUNTER_5_LO		0xA876
+#define A5XX_GPMU_POWER_COUNTER_5_HI		0xA877
+
+#define A5XX_GPMU_POWER_COUNTER_ENABLE		0xA878
+#define A5XX_GPMU_ALWAYS_ON_COUNTER_LO		0xA879
+#define A5XX_GPMU_ALWAYS_ON_COUNTER_HI		0xA87A
+#define A5XX_GPMU_ALWAYS_ON_COUNTER_RESET	0xA87B
+#define A5XX_GPMU_POWER_COUNTER_SELECT_0	0xA87C
+#define A5XX_GPMU_POWER_COUNTER_SELECT_1	0xA87D
+#define A5XX_GPMU_GPMU_SP_CLOCK_CONTROL		0xA880
+
+#define A5XX_GPMU_CLOCK_THROTTLE_CTRL		0xA8A3
+#define A5XX_GPMU_THROTTLE_UNMASK_FORCE_CTRL	0xA8A8
+
+#define A5XX_GPMU_TEMP_SENSOR_ID		0xAC00
+#define A5XX_GPMU_TEMP_SENSOR_CONFIG		0xAC01
+#define A5XX_GPMU_DELTA_TEMP_THRESHOLD		0xAC03
+#define A5XX_GPMU_TEMP_THRESHOLD_INTR_EN_MASK	0xAC06
+
+#define A5XX_GPMU_LEAKAGE_TEMP_COEFF_0_1	0xAC40
+#define A5XX_GPMU_LEAKAGE_TEMP_COEFF_2_3	0xAC41
+#define A5XX_GPMU_LEAKAGE_VTG_COEFF_0_1		0xAC42
+#define A5XX_GPMU_LEAKAGE_VTG_COEFF_2_3		0xAC43
+#define A5XX_GPMU_BASE_LEAKAGE			0xAC46
+
+#define A5XX_GPMU_GPMU_VOLTAGE			0xAC60
+#define A5XX_GPMU_GPMU_VOLTAGE_INTR_STATUS	0xAC61
+#define A5XX_GPMU_GPMU_VOLTAGE_INTR_EN_MASK	0xAC62
+#define A5XX_GPMU_GPMU_PWR_THRESHOLD		0xAC80
+#define A5XX_GPMU_GPMU_LLM_GLM_SLEEP_CTRL	0xACC4
+#define A5XX_GPMU_GPMU_LLM_GLM_SLEEP_STATUS	0xACC5
+#define A5XX_GPMU_GPMU_ISENSE_CTRL		0xACD0
+
+#define A5XX_GDPM_CONFIG1			0xB80C
+#define A5XX_GDPM_INT_EN			0xB80F
+#define A5XX_GDPM_INT_MASK			0xB811
+#define A5XX_GPMU_BEC_ENABLE			0xB9A0
+
+/* ISENSE registers */
+#define A5XX_GPU_CS_DECIMAL_ALIGN		0xC16A
+#define A5XX_GPU_CS_SENSOR_PARAM_CORE_1	0xC126
+#define A5XX_GPU_CS_SENSOR_PARAM_CORE_2	0xC127
+#define A5XX_GPU_CS_SW_OV_FUSE_EN		0xC168
+#define A5XX_GPU_CS_SENSOR_GENERAL_STATUS	0xC41A
+#define A5XX_GPU_CS_AMP_CALIBRATION_STATUS1_0	0xC41D
+#define A5XX_GPU_CS_AMP_CALIBRATION_STATUS1_2	0xC41F
+#define A5XX_GPU_CS_AMP_CALIBRATION_STATUS1_4	0xC421
+#define A5XX_GPU_CS_ENABLE_REG			0xC520
+#define A5XX_GPU_CS_AMP_CALIBRATION_CONTROL1	0xC557
+#define A5XX_GPU_CS_AMP_CALIBRATION_DONE	0xC565
+#define A5XX_GPU_CS_ENDPOINT_CALIBRATION_DONE   0xC556
+#endif /* _A5XX_REG_H */
+
--- a/qcom/opensource/graphics-kernel/a6xx_reg.h
+++ b/qcom/opensource/graphics-kernel/a6xx_reg.h
--- a/qcom/opensource/graphics-kernel/adreno-gpulist.h
+++ b/qcom/opensource/graphics-kernel/adreno-gpulist.h
--- a/qcom/opensource/graphics-kernel/adreno.c
+++ b/qcom/opensource/graphics-kernel/adreno.c
--- a/qcom/opensource/graphics-kernel/adreno.h
+++ b/qcom/opensource/graphics-kernel/adreno.h
--- a/qcom/opensource/graphics-kernel/adreno_a3xx.c
+++ b/qcom/opensource/graphics-kernel/adreno_a3xx.c
--- a/qcom/opensource/graphics-kernel/adreno_a3xx.h
+++ b/qcom/opensource/graphics-kernel/adreno_a3xx.h
@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2013-2016, 2019-2020, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+#ifndef __A3XX_H
+#define __A3XX_H
+
+#include "a3xx_reg.h"
+/**
+ * struct adreno_a3xx_core - a3xx specific GPU core definitions
+ */
+struct adreno_a3xx_core {
+	/** @base: Container for the generic &struct adreno_gpu_core */
+	struct adreno_gpu_core base;
+	/** pm4fw_name: Name of the PM4 microcode file */
+	const char *pm4fw_name;
+	/** pfpfw_name: Name of the PFP microcode file */
+	const char *pfpfw_name;
+	/** @vbif: List of registers and values to write for VBIF */
+	const struct kgsl_regmap_list *vbif;
+	/** @vbif_count: Number of registers in @vbif */
+	u32 vbif_count;
+};
+
+struct adreno_device;
+
+/**
+ * to_a3xx_core - return the a3xx specific GPU core struct
+ * @adreno_dev: An Adreno GPU device handle
+ *
+ * Returns:
+ * A pointer to the a3xx specific GPU core struct
+ */
+static inline const struct adreno_a3xx_core *
+to_a3xx_core(struct adreno_device *adreno_dev)
+{
+	const struct adreno_gpu_core *core = adreno_dev->gpucore;
+
+	return container_of(core, struct adreno_a3xx_core, base);
+}
+
+void a3xx_snapshot(struct adreno_device *adreno_dev,
+		struct kgsl_snapshot *snapshot);
+
+extern const struct adreno_perfcounters adreno_a3xx_perfcounters;
+
+/**
+ * a3xx_ringbuffer_init - Initialize the ringbuffer
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * Initialize the ringbuffer for a3xx.
+ * Return: 0 on success or negative on failure
+ */
+int a3xx_ringbuffer_init(struct adreno_device *adreno_dev);
+
+/**
+ * a3xx_ringbuffer_submitcmd - Submit a user command to the ringbuffer
+ * @adreno_dev: An Adreno GPU handle
+ * @cmdobj: Pointer to a user command object
+ * @flags: Internal submit flags
+ * @time: Optional pointer to a adreno_submit_time container
+ *
+ * Return: 0 on success or negative on failure
+ */
+int a3xx_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
+		struct kgsl_drawobj_cmd *cmdobj, u32 flags,
+		struct adreno_submit_time *time);
+
+#ifdef CONFIG_QCOM_KGSL_CORESIGHT
+void a3xx_coresight_init(struct adreno_device *device);
+#else
+static inline void a3xx_coresight_init(struct adreno_device *device) { }
+#endif
+
+#endif /*__A3XX_H */
--- a/qcom/opensource/graphics-kernel/adreno_a3xx_coresight.c
+++ b/qcom/opensource/graphics-kernel/adreno_a3xx_coresight.c
@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2012-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022, 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include "adreno.h"
+#include "adreno_a3xx.h"
+#include "adreno_coresight.h"
+
+static struct adreno_coresight_register a3xx_coresight_registers[] = {
+	{ A3XX_RBBM_DEBUG_BUS_CTL, 0x0001093F },
+	{ A3XX_RBBM_EXT_TRACE_STOP_CNT, 0x00017fff },
+	{ A3XX_RBBM_EXT_TRACE_START_CNT, 0x0001000f },
+	{ A3XX_RBBM_EXT_TRACE_PERIOD_CNT, 0x0001ffff },
+	{ A3XX_RBBM_EXT_TRACE_CMD, 0x00000001 },
+	{ A3XX_RBBM_EXT_TRACE_BUS_CTL, 0x89100010 },
+	{ A3XX_RBBM_DEBUG_BUS_STB_CTL0, 0x00000000 },
+	{ A3XX_RBBM_DEBUG_BUS_STB_CTL1, 0xFFFFFFFE },
+	{ A3XX_RBBM_INT_TRACE_BUS_CTL, 0x00201111 },
+};
+
+static ADRENO_CORESIGHT_ATTR(config_debug_bus,
+	&a3xx_coresight_registers[0]);
+static ADRENO_CORESIGHT_ATTR(config_trace_stop_cnt,
+	&a3xx_coresight_registers[1]);
+static ADRENO_CORESIGHT_ATTR(config_trace_start_cnt,
+	&a3xx_coresight_registers[2]);
+static ADRENO_CORESIGHT_ATTR(config_trace_period_cnt,
+	&a3xx_coresight_registers[3]);
+static ADRENO_CORESIGHT_ATTR(config_trace_cmd,
+	&a3xx_coresight_registers[4]);
+static ADRENO_CORESIGHT_ATTR(config_trace_bus_ctl,
+	&a3xx_coresight_registers[5]);
+
+static struct attribute *a3xx_coresight_attrs[] = {
+	&coresight_attr_config_debug_bus.attr.attr,
+	&coresight_attr_config_trace_start_cnt.attr.attr,
+	&coresight_attr_config_trace_stop_cnt.attr.attr,
+	&coresight_attr_config_trace_period_cnt.attr.attr,
+	&coresight_attr_config_trace_cmd.attr.attr,
+	&coresight_attr_config_trace_bus_ctl.attr.attr,
+	NULL,
+};
+
+static const struct attribute_group a3xx_coresight_group = {
+	.attrs = a3xx_coresight_attrs,
+};
+
+static const struct attribute_group *a3xx_coresight_groups[] = {
+	&a3xx_coresight_group,
+	NULL,
+};
+
+static const struct adreno_coresight a3xx_coresight = {
+	.registers = a3xx_coresight_registers,
+	.count = ARRAY_SIZE(a3xx_coresight_registers),
+	.groups = a3xx_coresight_groups,
+};
+
+void a3xx_coresight_init(struct adreno_device *adreno_dev)
+{
+	adreno_coresight_add_device(adreno_dev, "coresight-gfx",
+		&a3xx_coresight, &adreno_dev->gx_coresight);
+}
--- a/qcom/opensource/graphics-kernel/adreno_a3xx_perfcounter.c
+++ b/qcom/opensource/graphics-kernel/adreno_a3xx_perfcounter.c
@ -0,0 +1,411 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2020, The Linux Foundation. All rights reserved.
+ */
+
+#include "adreno.h"
+#include "adreno_a3xx.h"
+#include "adreno_perfcounter.h"
+#include "kgsl_device.h"
+
+/* Bit flag for RBMM_PERFCTR_CTL */
+#define RBBM_PERFCTR_CTL_ENABLE		0x00000001
+#define VBIF2_PERF_CNT_SEL_MASK 0x7F
+/* offset of clear register from select register */
+#define VBIF2_PERF_CLR_REG_SEL_OFF 8
+/* offset of enable register from select register */
+#define VBIF2_PERF_EN_REG_SEL_OFF 16
+/* offset of clear register from the enable register */
+#define VBIF2_PERF_PWR_CLR_REG_EN_OFF 8
+
+static void a3xx_counter_load(struct adreno_device *adreno_dev,
+		struct adreno_perfcount_register *reg)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int index = reg->load_bit / 32;
+	u32 enable = BIT(reg->load_bit & 31);
+
+	kgsl_regwrite(device, A3XX_RBBM_PERFCTR_LOAD_VALUE_LO,
+		lower_32_bits(reg->value));
+
+	kgsl_regwrite(device, A3XX_RBBM_PERFCTR_LOAD_VALUE_HI,
+		upper_32_bits(reg->value));
+
+	if (index == 0)
+		kgsl_regwrite(device, A3XX_RBBM_PERFCTR_LOAD_CMD0, enable);
+	else
+		kgsl_regwrite(device, A3XX_RBBM_PERFCTR_LOAD_CMD1, enable);
+}
+
+static int a3xx_counter_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter, unsigned int countable)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+
+	kgsl_regwrite(device, reg->select, countable);
+	reg->value = 0;
+
+	return 0;
+}
+
+static u64 a3xx_counter_read(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+	u32 val, hi, lo;
+
+	kgsl_regread(device, A3XX_RBBM_PERFCTR_CTL, &val);
+	kgsl_regwrite(device, A3XX_RBBM_PERFCTR_CTL,
+		val & ~RBBM_PERFCTR_CTL_ENABLE);
+
+	kgsl_regread(device, reg->offset, &lo);
+	kgsl_regread(device, reg->offset_hi, &hi);
+
+	kgsl_regwrite(device, A3XX_RBBM_PERFCTR_CTL, val);
+
+	return (((u64) hi) << 32) | lo;
+}
+
+static int a3xx_counter_pwr_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter, unsigned int countable)
+{
+	return 0;
+}
+
+static u64 a3xx_counter_pwr_read(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+	u32 val, hi, lo;
+
+	kgsl_regread(device, A3XX_RBBM_RBBM_CTL, &val);
+
+	/* Freeze the counter so we can read it */
+	if (!counter)
+		kgsl_regwrite(device, A3XX_RBBM_RBBM_CTL, val & ~0x10000);
+	else
+		kgsl_regwrite(device, A3XX_RBBM_RBBM_CTL, val & ~0x20000);
+
+	kgsl_regread(device, reg->offset, &lo);
+	kgsl_regread(device, reg->offset_hi, &hi);
+
+	kgsl_regwrite(device, A3XX_RBBM_RBBM_CTL, val);
+
+	return ((((u64) hi) << 32) | lo) + reg->value;
+}
+
+static int a3xx_counter_vbif_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter, unsigned int countable)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+
+	if (countable > VBIF2_PERF_CNT_SEL_MASK)
+		return -EINVAL;
+
+	/*
+	 * Write 1, followed by 0 to CLR register for
+	 * clearing the counter
+	 */
+	kgsl_regwrite(device,
+		reg->select - VBIF2_PERF_CLR_REG_SEL_OFF, 1);
+	kgsl_regwrite(device,
+		reg->select - VBIF2_PERF_CLR_REG_SEL_OFF, 0);
+	kgsl_regwrite(device,
+		reg->select, countable & VBIF2_PERF_CNT_SEL_MASK);
+	/* enable reg is 8 DWORDS before select reg */
+	kgsl_regwrite(device,
+		reg->select - VBIF2_PERF_EN_REG_SEL_OFF, 1);
+
+	kgsl_regwrite(device, reg->select, countable);
+
+	reg->value = 0;
+	return 0;
+}
+
+static u64 a3xx_counter_vbif_read(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+	u32 hi, lo;
+
+	/* freeze counter */
+	kgsl_regwrite(device, reg->select - VBIF2_PERF_EN_REG_SEL_OFF, 0);
+
+	kgsl_regread(device, reg->offset, &lo);
+	kgsl_regread(device, reg->offset_hi, &hi);
+
+	/* un-freeze counter */
+	kgsl_regwrite(device, reg->select - VBIF2_PERF_EN_REG_SEL_OFF, 1);
+
+	return ((((u64) hi) << 32) | lo) + reg->value;
+}
+
+static int a3xx_counter_vbif_pwr_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter, unsigned int countable)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+
+	/*
+	 * Write 1, followed by 0 to CLR register for
+	 * clearing the counter
+	 */
+	kgsl_regwrite(device, reg->select +
+		VBIF2_PERF_PWR_CLR_REG_EN_OFF, 1);
+	kgsl_regwrite(device, reg->select +
+		VBIF2_PERF_PWR_CLR_REG_EN_OFF, 0);
+	kgsl_regwrite(device, reg->select, 1);
+
+	reg->value = 0;
+	return 0;
+}
+
+static u64 a3xx_counter_vbif_pwr_read(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+	u32 hi, lo;
+
+	/* freeze counter */
+	kgsl_regwrite(device, reg->select, 0);
+
+	kgsl_regread(device, reg->offset, &lo);
+	kgsl_regread(device, reg->offset_hi, &hi);
+
+	/* un-freeze counter */
+	kgsl_regwrite(device, reg->select, 1);
+
+	return ((((u64) hi) << 32) | lo) + reg->value;
+}
+
+/*
+ * Define the available perfcounter groups - these get used by
+ * adreno_perfcounter_get and adreno_perfcounter_put
+ */
+
+static struct adreno_perfcount_register a3xx_perfcounters_cp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_CP_0_LO,
+		A3XX_RBBM_PERFCTR_CP_0_HI, 0, A3XX_CP_PERFCOUNTER_SELECT },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_rbbm[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RBBM_0_LO,
+		A3XX_RBBM_PERFCTR_RBBM_0_HI, 1, A3XX_RBBM_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RBBM_1_LO,
+		A3XX_RBBM_PERFCTR_RBBM_1_HI, 2, A3XX_RBBM_PERFCOUNTER1_SELECT },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_pc[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PC_0_LO,
+		A3XX_RBBM_PERFCTR_PC_0_HI, 3, A3XX_PC_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PC_1_LO,
+		A3XX_RBBM_PERFCTR_PC_1_HI, 4, A3XX_PC_PERFCOUNTER1_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PC_2_LO,
+		A3XX_RBBM_PERFCTR_PC_2_HI, 5, A3XX_PC_PERFCOUNTER2_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PC_3_LO,
+		A3XX_RBBM_PERFCTR_PC_3_HI, 6, A3XX_PC_PERFCOUNTER3_SELECT },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_vfd[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_VFD_0_LO,
+		A3XX_RBBM_PERFCTR_VFD_0_HI, 7, A3XX_VFD_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_VFD_1_LO,
+		A3XX_RBBM_PERFCTR_VFD_1_HI, 8, A3XX_VFD_PERFCOUNTER1_SELECT },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_hlsq[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_0_LO,
+		A3XX_RBBM_PERFCTR_HLSQ_0_HI, 9,
+		A3XX_HLSQ_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_1_LO,
+		A3XX_RBBM_PERFCTR_HLSQ_1_HI, 10,
+		A3XX_HLSQ_PERFCOUNTER1_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_2_LO,
+		A3XX_RBBM_PERFCTR_HLSQ_2_HI, 11,
+		A3XX_HLSQ_PERFCOUNTER2_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_3_LO,
+		A3XX_RBBM_PERFCTR_HLSQ_3_HI, 12,
+		A3XX_HLSQ_PERFCOUNTER3_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_4_LO,
+		A3XX_RBBM_PERFCTR_HLSQ_4_HI, 13,
+		A3XX_HLSQ_PERFCOUNTER4_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_5_LO,
+		A3XX_RBBM_PERFCTR_HLSQ_5_HI, 14,
+		A3XX_HLSQ_PERFCOUNTER5_SELECT },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_vpc[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_VPC_0_LO,
+		A3XX_RBBM_PERFCTR_VPC_0_HI, 15, A3XX_VPC_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_VPC_1_LO,
+		A3XX_RBBM_PERFCTR_VPC_1_HI, 16, A3XX_VPC_PERFCOUNTER1_SELECT },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_tse[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TSE_0_LO,
+		A3XX_RBBM_PERFCTR_TSE_0_HI, 17, A3XX_GRAS_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TSE_1_LO,
+		A3XX_RBBM_PERFCTR_TSE_1_HI, 18, A3XX_GRAS_PERFCOUNTER1_SELECT },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_ras[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RAS_0_LO,
+		A3XX_RBBM_PERFCTR_RAS_0_HI, 19, A3XX_GRAS_PERFCOUNTER2_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RAS_1_LO,
+		A3XX_RBBM_PERFCTR_RAS_1_HI, 20, A3XX_GRAS_PERFCOUNTER3_SELECT },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_uche[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_0_LO,
+		A3XX_RBBM_PERFCTR_UCHE_0_HI, 21,
+		A3XX_UCHE_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_1_LO,
+		A3XX_RBBM_PERFCTR_UCHE_1_HI, 22,
+		A3XX_UCHE_PERFCOUNTER1_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_2_LO,
+		A3XX_RBBM_PERFCTR_UCHE_2_HI, 23,
+		A3XX_UCHE_PERFCOUNTER2_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_3_LO,
+		A3XX_RBBM_PERFCTR_UCHE_3_HI, 24,
+		A3XX_UCHE_PERFCOUNTER3_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_4_LO,
+		A3XX_RBBM_PERFCTR_UCHE_4_HI, 25,
+		A3XX_UCHE_PERFCOUNTER4_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_5_LO,
+		A3XX_RBBM_PERFCTR_UCHE_5_HI, 26,
+		A3XX_UCHE_PERFCOUNTER5_SELECT },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_tp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_0_LO,
+		A3XX_RBBM_PERFCTR_TP_0_HI, 27, A3XX_TP_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_1_LO,
+		A3XX_RBBM_PERFCTR_TP_1_HI, 28, A3XX_TP_PERFCOUNTER1_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_2_LO,
+		A3XX_RBBM_PERFCTR_TP_2_HI, 29, A3XX_TP_PERFCOUNTER2_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_3_LO,
+		A3XX_RBBM_PERFCTR_TP_3_HI, 30, A3XX_TP_PERFCOUNTER3_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_4_LO,
+		A3XX_RBBM_PERFCTR_TP_4_HI, 31, A3XX_TP_PERFCOUNTER4_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_5_LO,
+		A3XX_RBBM_PERFCTR_TP_5_HI, 32, A3XX_TP_PERFCOUNTER5_SELECT },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_sp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_0_LO,
+		A3XX_RBBM_PERFCTR_SP_0_HI, 33, A3XX_SP_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_1_LO,
+		A3XX_RBBM_PERFCTR_SP_1_HI, 34, A3XX_SP_PERFCOUNTER1_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_2_LO,
+		A3XX_RBBM_PERFCTR_SP_2_HI, 35, A3XX_SP_PERFCOUNTER2_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_3_LO,
+		A3XX_RBBM_PERFCTR_SP_3_HI, 36, A3XX_SP_PERFCOUNTER3_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_4_LO,
+		A3XX_RBBM_PERFCTR_SP_4_HI, 37, A3XX_SP_PERFCOUNTER4_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_5_LO,
+		A3XX_RBBM_PERFCTR_SP_5_HI, 38, A3XX_SP_PERFCOUNTER5_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_6_LO,
+		A3XX_RBBM_PERFCTR_SP_6_HI, 39, A3XX_SP_PERFCOUNTER6_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_7_LO,
+		A3XX_RBBM_PERFCTR_SP_7_HI, 40, A3XX_SP_PERFCOUNTER7_SELECT },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_rb[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RB_0_LO,
+		A3XX_RBBM_PERFCTR_RB_0_HI, 41, A3XX_RB_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RB_1_LO,
+		A3XX_RBBM_PERFCTR_RB_1_HI, 42, A3XX_RB_PERFCOUNTER1_SELECT },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_pwr[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PWR_0_LO,
+		A3XX_RBBM_PERFCTR_PWR_0_HI, -1, 0 },
+	/*
+	 * A3XX_RBBM_PERFCTR_PWR_1_LO is used for frequency scaling and removed
+	 * from the pool of available counters
+	 */
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_vbif2[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF2_PERF_CNT_LOW0,
+		A3XX_VBIF2_PERF_CNT_HIGH0, -1, A3XX_VBIF2_PERF_CNT_SEL0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF2_PERF_CNT_LOW1,
+		A3XX_VBIF2_PERF_CNT_HIGH1, -1, A3XX_VBIF2_PERF_CNT_SEL1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF2_PERF_CNT_LOW2,
+		A3XX_VBIF2_PERF_CNT_HIGH2, -1, A3XX_VBIF2_PERF_CNT_SEL2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF2_PERF_CNT_LOW3,
+		A3XX_VBIF2_PERF_CNT_HIGH3, -1, A3XX_VBIF2_PERF_CNT_SEL3 },
+};
+/*
+ * Placing EN register in select field since vbif perf counters
+ * don't have select register to program
+ */
+static struct adreno_perfcount_register a3xx_perfcounters_vbif2_pwr[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0,
+		0, A3XX_VBIF2_PERF_PWR_CNT_LOW0,
+		A3XX_VBIF2_PERF_PWR_CNT_HIGH0, -1,
+		A3XX_VBIF2_PERF_PWR_CNT_EN0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0,
+		0, A3XX_VBIF2_PERF_PWR_CNT_LOW1,
+		A3XX_VBIF2_PERF_PWR_CNT_HIGH1, -1,
+		A3XX_VBIF2_PERF_PWR_CNT_EN1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0,
+		0, A3XX_VBIF2_PERF_PWR_CNT_LOW2,
+		A3XX_VBIF2_PERF_PWR_CNT_HIGH2, -1,
+		A3XX_VBIF2_PERF_PWR_CNT_EN2 },
+};
+
+#define A3XX_PERFCOUNTER_GROUP(offset, name, enable, read, load) \
+	ADRENO_PERFCOUNTER_GROUP(a3xx, offset, name, enable, read, load)
+
+#define A3XX_PERFCOUNTER_GROUP_FLAGS(offset, name, flags, enable, read, load) \
+	ADRENO_PERFCOUNTER_GROUP_FLAGS(a3xx, offset, name, flags, enable, read, load)
+
+#define A3XX_REGULAR_PERFCOUNTER_GROUP(offset, name) \
+	A3XX_PERFCOUNTER_GROUP(offset, name, a3xx_counter_enable,\
+		a3xx_counter_read, a3xx_counter_load)
+
+static const struct adreno_perfcount_group
+a3xx_perfcounter_groups[KGSL_PERFCOUNTER_GROUP_MAX] = {
+	A3XX_REGULAR_PERFCOUNTER_GROUP(CP, cp),
+	A3XX_REGULAR_PERFCOUNTER_GROUP(RBBM, rbbm),
+	A3XX_REGULAR_PERFCOUNTER_GROUP(PC, pc),
+	A3XX_REGULAR_PERFCOUNTER_GROUP(VFD, vfd),
+	A3XX_REGULAR_PERFCOUNTER_GROUP(HLSQ, hlsq),
+	A3XX_REGULAR_PERFCOUNTER_GROUP(VPC, vpc),
+	A3XX_REGULAR_PERFCOUNTER_GROUP(TSE, tse),
+	A3XX_REGULAR_PERFCOUNTER_GROUP(RAS, ras),
+	A3XX_REGULAR_PERFCOUNTER_GROUP(UCHE, uche),
+	A3XX_REGULAR_PERFCOUNTER_GROUP(TP, tp),
+	A3XX_REGULAR_PERFCOUNTER_GROUP(SP, sp),
+	A3XX_REGULAR_PERFCOUNTER_GROUP(RB, rb),
+	A3XX_PERFCOUNTER_GROUP_FLAGS(PWR, pwr,
+		ADRENO_PERFCOUNTER_GROUP_FIXED,
+		a3xx_counter_pwr_enable, a3xx_counter_pwr_read, NULL),
+	A3XX_PERFCOUNTER_GROUP(VBIF, vbif2,
+		a3xx_counter_vbif_enable, a3xx_counter_vbif_read, NULL),
+	A3XX_PERFCOUNTER_GROUP_FLAGS(VBIF_PWR, vbif2_pwr,
+		ADRENO_PERFCOUNTER_GROUP_FIXED,
+		a3xx_counter_vbif_pwr_enable, a3xx_counter_vbif_pwr_read,
+		NULL),
+
+};
+
+const struct adreno_perfcounters adreno_a3xx_perfcounters = {
+	a3xx_perfcounter_groups,
+	ARRAY_SIZE(a3xx_perfcounter_groups),
+};
--- a/qcom/opensource/graphics-kernel/adreno_a3xx_ringbuffer.c
+++ b/qcom/opensource/graphics-kernel/adreno_a3xx_ringbuffer.c
@ -0,0 +1,458 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include "adreno.h"
+#include "adreno_a3xx.h"
+#include "adreno_pm4types.h"
+#include "adreno_ringbuffer.h"
+#include "adreno_trace.h"
+#include "kgsl_trace.h"
+
+static int a3xx_wait_reg(unsigned int *cmds, unsigned int addr,
+			unsigned int val, unsigned int mask,
+			unsigned int interval)
+{
+	cmds[0] = cp_type3_packet(CP_WAIT_REG_EQ, 4);
+	cmds[1] = addr;
+	cmds[2] = val;
+	cmds[3] = mask;
+	cmds[4] = interval;
+
+	return 5;
+}
+
+static int a3xx_vbif_lock(unsigned int *cmds)
+{
+	int count;
+
+	/*
+	 * glue commands together until next
+	 * WAIT_FOR_ME
+	 */
+	count = a3xx_wait_reg(cmds, A3XX_CP_WFI_PEND_CTR,
+			1, 0xFFFFFFFF, 0xF);
+
+	/* MMU-500 VBIF stall */
+	cmds[count++] = cp_type3_packet(CP_REG_RMW, 3);
+	cmds[count++] = A3XX_VBIF_DDR_OUTPUT_RECOVERABLE_HALT_CTRL0;
+	/* AND to unmask the HALT bit */
+	cmds[count++] = ~(VBIF_RECOVERABLE_HALT_CTRL);
+	/* OR to set the HALT bit */
+	cmds[count++] = 0x1;
+
+	/* Wait for acknowledgment */
+	count += a3xx_wait_reg(&cmds[count],
+			A3XX_VBIF_DDR_OUTPUT_RECOVERABLE_HALT_CTRL1,
+			1, 0xFFFFFFFF, 0xF);
+
+	return count;
+}
+
+static int a3xx_vbif_unlock(unsigned int *cmds)
+{
+	/* MMU-500 VBIF unstall */
+	cmds[0] = cp_type3_packet(CP_REG_RMW, 3);
+	cmds[1] = A3XX_VBIF_DDR_OUTPUT_RECOVERABLE_HALT_CTRL0;
+	/* AND to unmask the HALT bit */
+	cmds[2] = ~(VBIF_RECOVERABLE_HALT_CTRL);
+	/* OR to reset the HALT bit */
+	cmds[3] = 0;
+
+	/* release all commands since _vbif_lock() with wait_for_me */
+	cmds[4] = cp_type3_packet(CP_WAIT_FOR_ME, 1);
+	cmds[5] = 0;
+
+	return 6;
+}
+
+#define A3XX_GPU_OFFSET 0xa000
+
+static int a3xx_cp_smmu_reg(unsigned int *cmds,
+				u32 reg,
+				unsigned int num)
+{
+	cmds[0] = cp_type3_packet(CP_REG_WR_NO_CTXT, num + 1);
+	cmds[1] = (A3XX_GPU_OFFSET + reg) >> 2;
+
+	return 2;
+}
+
+/* This function is only needed for A3xx targets */
+static int a3xx_tlbiall(unsigned int *cmds)
+{
+	unsigned int tlbstatus = (A3XX_GPU_OFFSET +
+		KGSL_IOMMU_CTX_TLBSTATUS) >> 2;
+	int count;
+
+	count = a3xx_cp_smmu_reg(cmds, KGSL_IOMMU_CTX_TLBIALL, 1);
+	cmds[count++] = 1;
+
+	count += a3xx_cp_smmu_reg(&cmds[count], KGSL_IOMMU_CTX_TLBSYNC, 1);
+	cmds[count++] = 0;
+
+	count += a3xx_wait_reg(&cmds[count], tlbstatus, 0,
+			KGSL_IOMMU_CTX_TLBSTATUS_SACTIVE, 0xF);
+
+	return count;
+}
+
+/* offset at which a nop command is placed in setstate */
+#define KGSL_IOMMU_SETSTATE_NOP_OFFSET	1024
+
+static int a3xx_rb_pagetable_switch(struct adreno_device *adreno_dev,
+		struct kgsl_pagetable *pagetable, u32 *cmds)
+{
+	u64 ttbr0 = kgsl_mmu_pagetable_get_ttbr0(pagetable);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_iommu *iommu = KGSL_IOMMU(device);
+	int count = 0;
+
+	/* Skip pagetable switch if current context is using default PT. */
+	if (pagetable == device->mmu.defaultpagetable)
+		return 0;
+	/*
+	 * Adding an indirect buffer ensures that the prefetch stalls until
+	 * the commands in indirect buffer have completed. We need to stall
+	 * prefetch with a nop indirect buffer when updating pagetables
+	 * because it provides stabler synchronization.
+	 */
+	cmds[count++] = cp_type3_packet(CP_WAIT_FOR_ME, 1);
+	cmds[count++] = 0;
+
+	cmds[count++] = cp_type3_packet(CP_INDIRECT_BUFFER_PFE, 2);
+	cmds[count++] = lower_32_bits(iommu->setstate->gpuaddr);
+	cmds[count++] = 2;
+
+	cmds[count++] = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	cmds[count++] = 0;
+
+	cmds[count++] = cp_type3_packet(CP_WAIT_FOR_ME, 1);
+	cmds[count++] = 0;
+
+	count += a3xx_vbif_lock(&cmds[count]);
+
+	count += a3xx_cp_smmu_reg(&cmds[count], KGSL_IOMMU_CTX_TTBR0, 2);
+	cmds[count++] = lower_32_bits(ttbr0);
+	cmds[count++] = upper_32_bits(ttbr0);
+
+	count += a3xx_vbif_unlock(&cmds[count]);
+
+	count += a3xx_tlbiall(&cmds[count]);
+
+	/* wait for me to finish the TLBI */
+	cmds[count++] = cp_type3_packet(CP_WAIT_FOR_ME, 1);
+	cmds[count++] = 0;
+	cmds[count++] = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	cmds[count++] = 0;
+
+	/* Invalidate the state */
+	cmds[count++] = cp_type3_packet(CP_INVALIDATE_STATE, 1);
+	cmds[count++] = 0x7ffff;
+
+	return count;
+}
+
+#define RB_SOPTIMESTAMP(device, rb) \
+	       MEMSTORE_RB_GPU_ADDR(device, rb, soptimestamp)
+#define CTXT_SOPTIMESTAMP(device, drawctxt) \
+	       MEMSTORE_ID_GPU_ADDR(device, (drawctxt)->base.id, soptimestamp)
+
+#define RB_EOPTIMESTAMP(device, rb) \
+	       MEMSTORE_RB_GPU_ADDR(device, rb, eoptimestamp)
+#define CTXT_EOPTIMESTAMP(device, drawctxt) \
+	       MEMSTORE_ID_GPU_ADDR(device, (drawctxt)->base.id, eoptimestamp)
+
+int a3xx_ringbuffer_init(struct adreno_device *adreno_dev)
+{
+	adreno_dev->num_ringbuffers = 1;
+
+	adreno_dev->cur_rb = &(adreno_dev->ringbuffers[0]);
+
+	return adreno_ringbuffer_setup(adreno_dev,
+		&adreno_dev->ringbuffers[0], 0);
+}
+
+#define A3XX_SUBMIT_MAX 55
+
+static int a3xx_ringbuffer_addcmds(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb, struct adreno_context *drawctxt,
+		u32 flags, u32 *in, u32 dwords, u32 timestamp,
+		struct adreno_submit_time *time)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 size = A3XX_SUBMIT_MAX + dwords;
+	u32 *cmds, index = 0;
+	u64 profile_gpuaddr;
+	u32 profile_dwords;
+
+	if (adreno_drawctxt_detached(drawctxt))
+		return -ENOENT;
+
+	if (adreno_gpu_fault(adreno_dev) != 0)
+		return -EPROTO;
+
+	rb->timestamp++;
+
+	if (drawctxt)
+		drawctxt->internal_timestamp = rb->timestamp;
+
+	cmds = adreno_ringbuffer_allocspace(rb, size);
+	if (IS_ERR(cmds))
+		return PTR_ERR(cmds);
+
+	/* Identify the start of a command */
+	cmds[index++] = cp_type3_packet(CP_NOP, 1);
+	cmds[index++] = drawctxt ? CMD_IDENTIFIER : CMD_INTERNAL_IDENTIFIER;
+
+	if (IS_PWRON_FIXUP(flags)) {
+		cmds[index++] = cp_type3_packet(CP_SET_PROTECTED_MODE, 1);
+		cmds[index++] = 0;
+
+		cmds[index++] = cp_type3_packet(CP_NOP, 1);
+		cmds[index++] = PWRON_FIXUP_IDENTIFIER;
+
+		cmds[index++] = cp_type3_packet(CP_INDIRECT_BUFFER_PFE, 2);
+		cmds[index++] = lower_32_bits(adreno_dev->pwron_fixup->gpuaddr);
+		cmds[index++] = adreno_dev->pwron_fixup_dwords;
+
+		cmds[index++] = cp_type3_packet(CP_SET_PROTECTED_MODE, 1);
+		cmds[index++] = 0;
+	}
+
+	profile_gpuaddr = adreno_profile_preib_processing(adreno_dev,
+		drawctxt, &profile_dwords);
+
+	if (profile_gpuaddr) {
+		cmds[index++] = cp_type3_packet(CP_INDIRECT_BUFFER_PFE, 2);
+		cmds[index++] = lower_32_bits(profile_gpuaddr);
+		cmds[index++] = profile_dwords;
+	}
+
+	if (drawctxt) {
+		cmds[index++] = cp_type3_packet(CP_MEM_WRITE, 2);
+		cmds[index++] = lower_32_bits(CTXT_SOPTIMESTAMP(device,
+					drawctxt));
+		cmds[index++] = timestamp;
+	}
+
+	cmds[index++] = cp_type3_packet(CP_MEM_WRITE, 2);
+	cmds[index++] = lower_32_bits(RB_SOPTIMESTAMP(device, rb));
+	cmds[index++] = rb->timestamp;
+
+	if (IS_NOTPROTECTED(flags)) {
+		cmds[index++] = cp_type3_packet(CP_SET_PROTECTED_MODE, 1);
+		cmds[index++] = 0;
+	}
+
+	memcpy(&cmds[index], in, dwords << 2);
+	index += dwords;
+
+	if (IS_NOTPROTECTED(flags)) {
+		cmds[index++] = cp_type3_packet(CP_SET_PROTECTED_MODE, 1);
+		cmds[index++] = 1;
+	}
+
+	/*
+	 * Flush HLSQ lazy updates to make sure there are no resourses pending
+	 * for indirect loads after the timestamp
+	 */
+
+	cmds[index++] = cp_type3_packet(CP_EVENT_WRITE, 1);
+	cmds[index++] = 0x07; /* HLSQ FLUSH */
+	cmds[index++] = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	cmds[index++] = 0;
+
+	profile_gpuaddr = adreno_profile_postib_processing(adreno_dev,
+		drawctxt, &profile_dwords);
+
+	if (profile_gpuaddr) {
+		cmds[index++] = cp_type3_packet(CP_INDIRECT_BUFFER_PFE, 2);
+		cmds[index++] = lower_32_bits(profile_gpuaddr);
+		cmds[index++] = profile_dwords;
+	}
+
+	/*
+	 * If this is an internal command, just write the ringbuffer timestamp,
+	 * otherwise, write both
+	 */
+	if (!drawctxt) {
+		cmds[index++] = cp_type3_packet(CP_EVENT_WRITE, 3);
+		cmds[index++] = CACHE_FLUSH_TS | (1 << 31);
+		cmds[index++] = lower_32_bits(RB_EOPTIMESTAMP(device, rb));
+		cmds[index++] = rb->timestamp;
+	} else {
+		cmds[index++] = cp_type3_packet(CP_EVENT_WRITE, 3);
+		cmds[index++] = CACHE_FLUSH_TS | (1 << 31);
+		cmds[index++] = lower_32_bits(CTXT_EOPTIMESTAMP(device,
+					drawctxt));
+		cmds[index++] = timestamp;
+
+		cmds[index++] = cp_type3_packet(CP_EVENT_WRITE, 3);
+		cmds[index++] = CACHE_FLUSH_TS;
+		cmds[index++] = lower_32_bits(RB_EOPTIMESTAMP(device, rb));
+		cmds[index++] = rb->timestamp;
+	}
+
+	/* Trigger a context rollover */
+	cmds[index++] = cp_type3_packet(CP_SET_CONSTANT, 2);
+	cmds[index++] = (4 << 16) | (A3XX_HLSQ_CL_KERNEL_GROUP_X_REG - 0x2000);
+	cmds[index++] = 0;
+
+	if (IS_WFI(flags)) {
+		cmds[index++] = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+		cmds[index++] = 0;
+	}
+
+	/* Adjust the thing for the number of bytes we actually wrote */
+	rb->_wptr -= (size - index);
+
+	kgsl_pwrscale_busy(device);
+	kgsl_regwrite(device, A3XX_CP_RB_WPTR, rb->_wptr);
+	rb->wptr = rb->_wptr;
+
+	return 0;
+}
+
+static int a3xx_rb_context_switch(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb,
+		struct adreno_context *drawctxt)
+{
+	struct kgsl_pagetable *pagetable =
+		adreno_drawctxt_get_pagetable(drawctxt);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int count = 0;
+	u32 cmds[64];
+
+	if (adreno_drawctxt_get_pagetable(rb->drawctxt_active) != pagetable)
+		count += a3xx_rb_pagetable_switch(adreno_dev, pagetable, cmds);
+
+	cmds[count++] = cp_type3_packet(CP_NOP, 1);
+	cmds[count++] = CONTEXT_TO_MEM_IDENTIFIER;
+
+	cmds[count++] = cp_type3_packet(CP_MEM_WRITE, 2);
+	cmds[count++] = lower_32_bits(MEMSTORE_RB_GPU_ADDR(device, rb,
+				current_context));
+	cmds[count++] = drawctxt->base.id;
+
+	cmds[count++] = cp_type3_packet(CP_MEM_WRITE, 2);
+	cmds[count++] = lower_32_bits(MEMSTORE_ID_GPU_ADDR(device,
+		KGSL_MEMSTORE_GLOBAL, current_context));
+	cmds[count++] = drawctxt->base.id;
+
+	cmds[count++] = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
+	cmds[count++] = 0;
+	cmds[count++] = 0x90000000;
+
+	return a3xx_ringbuffer_addcmds(adreno_dev, rb, NULL, F_NOTPROTECTED,
+			cmds, count, 0, NULL);
+}
+
+static int a3xx_drawctxt_switch(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb,
+		struct adreno_context *drawctxt)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (rb->drawctxt_active == drawctxt)
+		return 0;
+
+	if (kgsl_context_detached(&drawctxt->base))
+		return -ENOENT;
+
+	if (!_kgsl_context_get(&drawctxt->base))
+		return -ENOENT;
+
+	trace_adreno_drawctxt_switch(rb, drawctxt);
+
+	a3xx_rb_context_switch(adreno_dev, rb, drawctxt);
+
+	/* Release the current drawctxt as soon as the new one is switched */
+	adreno_put_drawctxt_on_timestamp(device, rb->drawctxt_active,
+		rb, rb->timestamp);
+
+	rb->drawctxt_active = drawctxt;
+	return 0;
+}
+
+#define A3XX_COMMAND_DWORDS 4
+
+int a3xx_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
+		struct kgsl_drawobj_cmd *cmdobj, u32 flags,
+		struct adreno_submit_time *time)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context);
+	struct adreno_ringbuffer *rb = drawctxt->rb;
+	int ret = 0, numibs = 0, index = 0;
+	u32 *cmds;
+
+	/* Count the number of IBs (if we are not skipping) */
+	if (!IS_SKIP(flags)) {
+		struct list_head *tmp;
+
+		list_for_each(tmp, &cmdobj->cmdlist)
+			numibs++;
+	}
+
+	cmds = kmalloc((A3XX_COMMAND_DWORDS + (numibs * 4)) << 2, GFP_KERNEL);
+	if (!cmds) {
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	cmds[index++] = cp_type3_packet(CP_NOP, 1);
+	cmds[index++] = START_IB_IDENTIFIER;
+
+	if (numibs) {
+		struct kgsl_memobj_node *ib;
+
+		list_for_each_entry(ib, &cmdobj->cmdlist, node) {
+			if (ib->priv & MEMOBJ_SKIP ||
+			    (ib->flags & KGSL_CMDLIST_CTXTSWITCH_PREAMBLE
+			     && !IS_PREAMBLE(flags)))
+				cmds[index++] = cp_type3_packet(CP_NOP, 3);
+
+			cmds[index++] =
+				cp_type3_packet(CP_INDIRECT_BUFFER_PFE, 2);
+			cmds[index++] = lower_32_bits(ib->gpuaddr);
+			cmds[index++] = ib->size >> 2;
+		}
+	}
+
+	cmds[index++] = cp_type3_packet(CP_NOP, 1);
+	cmds[index++] = END_IB_IDENTIFIER;
+
+	ret = a3xx_drawctxt_switch(adreno_dev, rb, drawctxt);
+
+	/*
+	 * In the unlikely event of an error in the drawctxt switch,
+	 * treat it like a hang
+	 */
+	if (ret) {
+		/*
+		 * It is "normal" to get a -ENOSPC or a -ENOENT. Don't log it,
+		 * the upper layers know how to handle it
+		 */
+		if (ret != -ENOSPC && ret != -ENOENT)
+			dev_err(device->dev,
+				     "Unable to switch draw context: %d\n",
+				     ret);
+		goto done;
+	}
+
+	adreno_drawobj_set_constraint(device, drawobj);
+
+	ret = a3xx_ringbuffer_addcmds(adreno_dev, drawctxt->rb, drawctxt,
+		flags, cmds, index, drawobj->timestamp, NULL);
+
+done:
+	trace_kgsl_issueibcmds(device, drawctxt->base.id, numibs,
+		drawobj->timestamp, drawobj->flags, ret, drawctxt->type);
+
+	kfree(cmds);
+	return ret;
+}
--- a/qcom/opensource/graphics-kernel/adreno_a3xx_snapshot.c
+++ b/qcom/opensource/graphics-kernel/adreno_a3xx_snapshot.c
@ -0,0 +1,449 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2012-2017,2019-2020, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/io.h>
+
+#include "adreno.h"
+#include "adreno_a3xx.h"
+#include "adreno_snapshot.h"
+#include "kgsl_device.h"
+
+/*
+ * Set of registers to dump for A3XX on snapshot.
+ * Registers in pairs - first value is the start offset, second
+ * is the stop offset (inclusive)
+ */
+
+static const unsigned int a3xx_registers[] = {
+	0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
+	0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
+	0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5,
+	0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1,
+	0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd,
+	0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f6, 0x01f8, 0x01f9,
+	0x01fc, 0x01ff,
+	0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f,
+	0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f,
+	0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e,
+	0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f,
+	0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7,
+	0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5,
+	0x0e41, 0x0e45, 0x0e64, 0x0e65,
+	0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7,
+	0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09,
+	0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069,
+	0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075,
+	0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109,
+	0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115,
+	0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0,
+	0x2240, 0x227e,
+	0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8,
+	0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7,
+	0x22ff, 0x22ff, 0x2340, 0x2343,
+	0x2440, 0x2440, 0x2444, 0x2444, 0x2448, 0x244d,
+	0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470, 0x2472, 0x2472,
+	0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3, 0x24e4, 0x24ef,
+	0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e, 0x2510, 0x2511,
+	0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea, 0x25ec, 0x25ed,
+	0x25f0, 0x25f0,
+	0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0, 0x26c4, 0x26ce,
+	0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9, 0x26ec, 0x26ec,
+	0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743,
+	0x300C, 0x300E, 0x301C, 0x301D,
+	0x302A, 0x302A, 0x302C, 0x302D, 0x3030, 0x3031, 0x3034, 0x3036,
+	0x303C, 0x303C, 0x305E, 0x305F,
+};
+
+/* Removed the following HLSQ register ranges from being read during
+ * fault tolerance since reading the registers may cause the device to hang:
+ */
+static const unsigned int a3xx_hlsq_registers[] = {
+	0x0e00, 0x0e05, 0x0e0c, 0x0e0c, 0x0e22, 0x0e23,
+	0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a,
+	0x2600, 0x2612, 0x2614, 0x2617, 0x261a, 0x261a,
+};
+
+/* Shader memory size in words */
+#define SHADER_MEMORY_SIZE 0x4000
+
+/**
+ * _rbbm_debug_bus_read - Helper function to read data from the RBBM
+ * debug bus.
+ * @device - GPU device to read/write registers
+ * @block_id - Debug bus block to read from
+ * @index - Index in the debug bus block to read
+ * @ret - Value of the register read
+ */
+static void _rbbm_debug_bus_read(struct kgsl_device *device,
+	unsigned int block_id, unsigned int index, unsigned int *val)
+{
+	unsigned int block = (block_id << 8) | 1 << 16;
+
+	kgsl_regwrite(device, A3XX_RBBM_DEBUG_BUS_CTL, block | index);
+	kgsl_regread(device, A3XX_RBBM_DEBUG_BUS_DATA_STATUS, val);
+}
+
+/**
+ * a3xx_snapshot_shader_memory - Helper function to dump the GPU shader
+ * memory to the snapshot buffer.
+ * @device: GPU device whose shader memory is to be dumped
+ * @buf: Pointer to binary snapshot data blob being made
+ * @remain: Number of remaining bytes in the snapshot blob
+ * @priv: Unused parameter
+ *
+ */
+static size_t a3xx_snapshot_shader_memory(struct kgsl_device *device,
+	u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf;
+	void *data = buf + sizeof(*header);
+	unsigned int shader_read_len = SHADER_MEMORY_SIZE;
+
+	if (remain < DEBUG_SECTION_SZ(shader_read_len)) {
+		SNAPSHOT_ERR_NOMEM(device, "SHADER MEMORY");
+		return 0;
+	}
+
+	header->type = SNAPSHOT_DEBUG_SHADER_MEMORY;
+	header->size = shader_read_len;
+
+	/* Map shader memory to kernel, for dumping */
+	if (IS_ERR_OR_NULL(device->shader_mem_virt)) {
+		struct resource *res;
+
+		res = platform_get_resource_byname(device->pdev,
+			IORESOURCE_MEM, "kgsl_3d0_shader_memory");
+
+		if (res)
+			device->shader_mem_virt =
+				devm_ioremap_resource(&device->pdev->dev, res);
+	}
+
+	if (IS_ERR_OR_NULL(device->shader_mem_virt)) {
+		dev_err(device->dev, "Unable to map the shader memory\n");
+		return 0;
+	}
+
+	memcpy_fromio(data, device->shader_mem_virt, shader_read_len << 2);
+
+	return DEBUG_SECTION_SZ(shader_read_len);
+}
+
+static size_t a3xx_snapshot_debugbus_block(struct kgsl_device *device,
+	u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_debugbus *header
+		= (struct kgsl_snapshot_debugbus *)buf;
+	struct adreno_debugbus_block *block = priv;
+	int i;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	size_t size;
+
+	size = (0x40 * sizeof(unsigned int)) + sizeof(*header);
+
+	if (remain < size) {
+		SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS");
+		return 0;
+	}
+
+	header->id = block->block_id;
+	header->count = 0x40;
+
+	for (i = 0; i < 0x40; i++)
+		_rbbm_debug_bus_read(device, block->block_id, i, &data[i]);
+
+	return size;
+}
+
+static struct adreno_debugbus_block debugbus_blocks[] = {
+	{ RBBM_BLOCK_ID_CP, 0x52, },
+	{ RBBM_BLOCK_ID_RBBM, 0x40, },
+	{ RBBM_BLOCK_ID_VBIF, 0x40, },
+	{ RBBM_BLOCK_ID_HLSQ, 0x40, },
+	{ RBBM_BLOCK_ID_UCHE, 0x40, },
+	{ RBBM_BLOCK_ID_PC, 0x40, },
+	{ RBBM_BLOCK_ID_VFD, 0x40, },
+	{ RBBM_BLOCK_ID_VPC, 0x40, },
+	{ RBBM_BLOCK_ID_TSE, 0x40, },
+	{ RBBM_BLOCK_ID_RAS, 0x40, },
+	{ RBBM_BLOCK_ID_VSC, 0x40, },
+	{ RBBM_BLOCK_ID_SP_0, 0x40, },
+	{ RBBM_BLOCK_ID_SP_1, 0x40, },
+	{ RBBM_BLOCK_ID_SP_2, 0x40, },
+	{ RBBM_BLOCK_ID_SP_3, 0x40, },
+	{ RBBM_BLOCK_ID_TPL1_0, 0x40, },
+	{ RBBM_BLOCK_ID_TPL1_1, 0x40, },
+	{ RBBM_BLOCK_ID_TPL1_2, 0x40, },
+	{ RBBM_BLOCK_ID_TPL1_3, 0x40, },
+	{ RBBM_BLOCK_ID_RB_0, 0x40, },
+	{ RBBM_BLOCK_ID_RB_1, 0x40, },
+	{ RBBM_BLOCK_ID_RB_2, 0x40, },
+	{ RBBM_BLOCK_ID_RB_3, 0x40, },
+	{ RBBM_BLOCK_ID_MARB_0, 0x40, },
+	{ RBBM_BLOCK_ID_MARB_1, 0x40, },
+	{ RBBM_BLOCK_ID_MARB_2, 0x40, },
+	{ RBBM_BLOCK_ID_MARB_3, 0x40, },
+};
+
+static void a3xx_snapshot_debugbus(struct kgsl_device *device,
+		struct kgsl_snapshot *snapshot)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(debugbus_blocks); i++) {
+		kgsl_snapshot_add_section(device,
+			KGSL_SNAPSHOT_SECTION_DEBUGBUS, snapshot,
+			a3xx_snapshot_debugbus_block,
+			(void *) &debugbus_blocks[i]);
+	}
+}
+
+static void _snapshot_hlsq_regs(struct kgsl_device *device,
+		struct kgsl_snapshot *snapshot)
+{
+	unsigned int next_pif = 0;
+
+	/*
+	 * Trying to read HLSQ registers when the HLSQ block is busy
+	 * will cause the device to hang.  The RBBM_DEBUG_BUS has information
+	 * that will tell us if the HLSQ block is busy or not.  Read values
+	 * from the debug bus to ensure the HLSQ block is not busy (this
+	 * is hardware dependent).  If the HLSQ block is busy do not
+	 * dump the registers, otherwise dump the HLSQ registers.
+	 */
+
+	/*
+	 * tpif status bits: RBBM_BLOCK_ID_HLSQ index 4 [4:0]
+	 * spif status bits: RBBM_BLOCK_ID_HLSQ index 7 [5:0]
+	 *
+	 * if ((tpif == 0, 1, 28) && (spif == 0, 1, 10))
+	 * then dump HLSQ registers
+	 */
+
+	/* check tpif */
+	_rbbm_debug_bus_read(device, RBBM_BLOCK_ID_HLSQ, 4, &next_pif);
+	next_pif &= 0x1f;
+	if (next_pif != 0 && next_pif != 1 && next_pif != 28)
+		return;
+
+	/* check spif */
+	_rbbm_debug_bus_read(device, RBBM_BLOCK_ID_HLSQ, 7, &next_pif);
+	next_pif &= 0x3f;
+	if (next_pif != 0 && next_pif != 1 && next_pif != 10)
+		return;
+
+	SNAPSHOT_REGISTERS(device, snapshot, a3xx_hlsq_registers);
+}
+
+#define VPC_MEM_SIZE 512
+
+static size_t a3xx_snapshot_vpc_memory(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv)
+{
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	size_t size = 4 * VPC_MEM_SIZE;
+	int bank, addr, i = 0;
+
+	if (remain < DEBUG_SECTION_SZ(size)) {
+		SNAPSHOT_ERR_NOMEM(device, "VPC MEMORY");
+		return 0;
+	}
+
+	header->type = SNAPSHOT_DEBUG_VPC_MEMORY;
+	header->size = size;
+
+	for (bank = 0; bank < 4; bank++) {
+		for (addr = 0; addr < VPC_MEM_SIZE; addr++) {
+			unsigned int val = bank | (addr << 4);
+
+			kgsl_regwrite(device, A3XX_VPC_VPC_DEBUG_RAM_SEL, val);
+			kgsl_regread(device, A3XX_VPC_VPC_DEBUG_RAM_READ,
+				&data[i++]);
+		}
+	}
+
+	return DEBUG_SECTION_SZ(size);
+}
+
+static size_t a3xx_snapshot_cp_pm4_ram(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	struct adreno_firmware *fw = ADRENO_FW(adreno_dev, ADRENO_FW_PM4);
+	size_t size = fw->size - 1;
+
+	if (remain < DEBUG_SECTION_SZ(size)) {
+		SNAPSHOT_ERR_NOMEM(device, "CP PM4 RAM DEBUG");
+		return 0;
+	}
+
+	header->type = SNAPSHOT_DEBUG_CP_PM4_RAM;
+	header->size = size;
+
+	/*
+	 * Read the firmware from the GPU rather than use our cache in order to
+	 * try to catch mis-programming or corruption in the hardware.  We do
+	 * use the cached version of the size, however, instead of trying to
+	 * maintain always changing hardcoded constants
+	 */
+	kgsl_regmap_read_indexed(&device->regmap, A3XX_CP_ME_RAM_RADDR,
+		A3XX_CP_ME_RAM_DATA, data, size);
+
+	return DEBUG_SECTION_SZ(size);
+}
+
+static size_t a3xx_snapshot_cp_pfp_ram(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	struct adreno_firmware *fw = ADRENO_FW(adreno_dev, ADRENO_FW_PFP);
+	int size = fw->size - 1;
+
+	if (remain < DEBUG_SECTION_SZ(size)) {
+		SNAPSHOT_ERR_NOMEM(device, "CP PFP RAM DEBUG");
+		return 0;
+	}
+
+	header->type = SNAPSHOT_DEBUG_CP_PFP_RAM;
+	header->size = size;
+
+	/*
+	 * Read the firmware from the GPU rather than use our cache in order to
+	 * try to catch mis-programming or corruption in the hardware.  We do
+	 * use the cached version of the size, however, instead of trying to
+	 * maintain always changing hardcoded constants
+	 */
+	kgsl_regmap_read_indexed(&device->regmap, A3XX_CP_PFP_UCODE_ADDR,
+		A3XX_CP_PFP_UCODE_DATA, data, size);
+
+	return DEBUG_SECTION_SZ(size);
+}
+
+static size_t a3xx_snapshot_cp_roq(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv)
+{
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *) buf;
+	u32 *data = (u32 *) (buf + sizeof(*header));
+
+	if (remain < DEBUG_SECTION_SZ(128)) {
+		SNAPSHOT_ERR_NOMEM(device, "CP ROQ DEBUG");
+		return 0;
+	}
+
+	header->type = SNAPSHOT_DEBUG_CP_ROQ;
+	header->size = 128;
+
+	kgsl_regmap_read_indexed(&device->regmap, A3XX_CP_ROQ_ADDR,
+		A3XX_CP_ROQ_DATA, data, 128);
+
+	return DEBUG_SECTION_SZ(128);
+}
+
+static size_t a3xx_snapshot_cp_meq(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv)
+{
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *) buf;
+	u32 *data = (u32 *) (buf + sizeof(*header));
+
+	if (remain < DEBUG_SECTION_SZ(16)) {
+		SNAPSHOT_ERR_NOMEM(device, "CP MEQ DEBUG");
+		return 0;
+	}
+
+	header->type = SNAPSHOT_DEBUG_CP_MEQ;
+	header->size = 16;
+
+	kgsl_regmap_read_indexed(&device->regmap, A3XX_CP_MEQ_ADDR,
+		A3XX_CP_MEQ_DATA, data, 16);
+
+	return DEBUG_SECTION_SZ(16);
+}
+
+/*
+ * a3xx_snapshot() - A3XX GPU snapshot function
+ * @adreno_dev: Device being snapshotted
+ * @snapshot: Snapshot metadata
+ * @remain: Amount of space left in snapshot memory
+ *
+ * This is where all of the A3XX specific bits and pieces are grabbed
+ * into the snapshot memory
+ */
+void a3xx_snapshot(struct adreno_device *adreno_dev,
+		struct kgsl_snapshot *snapshot)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int reg;
+
+	/* Disable Clock gating temporarily for the debug bus to work */
+	kgsl_regwrite(device, A3XX_RBBM_CLOCK_CTL, 0x0);
+
+	/* Save some CP information that the generic snapshot uses */
+	kgsl_regread(device, A3XX_CP_IB1_BASE, &reg);
+	snapshot->ib1base = (u64) reg;
+
+	kgsl_regread(device, A3XX_CP_IB2_BASE, &reg);
+	snapshot->ib2base = (u64) reg;
+
+	kgsl_regread(device, A3XX_CP_IB1_BUFSZ, &snapshot->ib1size);
+	kgsl_regread(device, A3XX_CP_IB2_BUFSZ, &snapshot->ib2size);
+
+	SNAPSHOT_REGISTERS(device, snapshot, a3xx_registers);
+
+	_snapshot_hlsq_regs(device, snapshot);
+
+	kgsl_snapshot_indexed_registers(device, snapshot,
+		A3XX_CP_STATE_DEBUG_INDEX, A3XX_CP_STATE_DEBUG_DATA, 0, 0x14);
+
+	/* CP_ME indexed registers */
+	kgsl_snapshot_indexed_registers(device, snapshot,
+		A3XX_CP_ME_CNTL, A3XX_CP_ME_STATUS, 64, 44);
+
+	/* VPC memory */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, a3xx_snapshot_vpc_memory, NULL);
+
+	/* CP MEQ */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, snapshot,
+		a3xx_snapshot_cp_meq, NULL);
+
+	/* Shader working/shadow memory */
+	 kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, a3xx_snapshot_shader_memory, NULL);
+
+
+	/* CP PFP and PM4 */
+
+	/*
+	 * Reading the microcode while the CP is running will
+	 * basically move the CP instruction pointer to
+	 * whatever address we read. Big badaboom ensues. Stop the CP
+	 * (if it isn't already stopped) to ensure that we are safe.
+	 * We do this here and not earlier to avoid corrupting the RBBM
+	 * status and CP registers - by the time we get here we don't
+	 * care about the contents of the CP anymore.
+	 */
+
+	kgsl_regread(device, A3XX_CP_ME_CNTL, &reg);
+	reg |= (1 << 27) | (1 << 28);
+	kgsl_regwrite(device, A3XX_CP_ME_CNTL, reg);
+
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, a3xx_snapshot_cp_pfp_ram, NULL);
+
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, a3xx_snapshot_cp_pm4_ram, NULL);
+
+	/* CP ROQ */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, a3xx_snapshot_cp_roq, NULL);
+
+	a3xx_snapshot_debugbus(device, snapshot);
+}
--- a/qcom/opensource/graphics-kernel/adreno_a5xx.c
+++ b/qcom/opensource/graphics-kernel/adreno_a5xx.c
--- a/qcom/opensource/graphics-kernel/adreno_a5xx.h
+++ b/qcom/opensource/graphics-kernel/adreno_a5xx.h
@ -0,0 +1,314 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2015-2017,2019-2020 The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022,2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _ADRENO_A5XX_H_
+#define _ADRENO_A5XX_H_
+
+#include "a5xx_reg.h"
+
+/**
+ * struct adreno_a5xx_core - a5xx specific GPU core definitions
+ */
+struct adreno_a5xx_core {
+	/** @base: Container for the generic &struct adreno_gpu_core */
+	struct adreno_gpu_core base;
+	/** @gpmu_tsens: ID for the temperature sensor used by the GPMU */
+	unsigned int gpmu_tsens;
+	/** @max_power: Max possible power draw of a core */
+	unsigned int max_power;
+	/** pm4fw_name: Name of the PM4 microcode file */
+	const char *pm4fw_name;
+	/** pfpfw_name: Name of the PFP microcode file */
+	const char *pfpfw_name;
+	/** gpmufw_name: Name of the GPMU microcode file */
+	const char *gpmufw_name;
+	/** @regfw_name: Filename for the LM registers if applicable */
+	const char *regfw_name;
+	/** @zap_name: Name of the CPZ zap file */
+	const char *zap_name;
+	/** @hwcg: List of registers and values to write for HWCG */
+	const struct kgsl_regmap_list *hwcg;
+	/** @hwcg_count: Number of registers in @hwcg */
+	u32 hwcg_count;
+	/** @vbif: List of registers and values to write for VBIF */
+	const struct kgsl_regmap_list *vbif;
+	/** @vbif_count: Number of registers in @vbif */
+	u32 vbif_count;
+	/** @highest_bank_bit: The bit of the highest DDR bank */
+	u32 highest_bank_bit;
+};
+
+#define A5XX_CP_CTXRECORD_MAGIC_REF     0x27C4BAFCUL
+/* Size of each CP preemption record */
+#define A5XX_CP_CTXRECORD_SIZE_IN_BYTES     0x10000
+/* Size of the preemption counter block (in bytes) */
+#define A5XX_CP_CTXRECORD_PREEMPTION_COUNTER_SIZE   (16 * 4)
+
+/**
+ * struct a5xx_cp_preemption_record - CP context record for
+ * preemption.
+ * @magic: (00) Value at this offset must be equal to
+ * A5XX_CP_CTXRECORD_MAGIC_REF.
+ * @info: (04) Type of record. Written non-zero (usually) by CP.
+ * we must set to zero for all ringbuffers.
+ * @data: (08) DATA field in SET_RENDER_MODE or checkpoint packets.
+ * Written by CP when switching out. Not used on switch-in.
+ * we must initialize to zero.
+ * @cntl: (12) RB_CNTL, saved and restored by CP.
+ * @rptr: (16) RB_RPTR, saved and restored by CP.
+ * @wptr: (20) RB_WPTR, saved and restored by CP.
+ * @rptr_addr: (24) RB_RPTR_ADDR_LO|HI saved and restored.
+ * rbase: (32) RB_BASE_LO|HI saved and restored.
+ * counter: (40) Pointer to preemption counter
+ */
+struct a5xx_cp_preemption_record {
+	uint32_t  magic;
+	uint32_t  info;
+	uint32_t  data;
+	uint32_t  cntl;
+	uint32_t  rptr;
+	uint32_t  wptr;
+	uint64_t  rptr_addr;
+	uint64_t  rbase;
+	uint64_t  counter;
+};
+
+#define A5XX_CP_SMMU_INFO_MAGIC_REF     0x3618CDA3UL
+
+/**
+ * struct a5xx_cp_smmu_info - CP preemption SMMU info.
+ * @magic: (00) The value at this offset must be equal to
+ * A5XX_CP_SMMU_INFO_MAGIC_REF.
+ * @_pad4: (04) Reserved/padding
+ * @ttbr0: (08) Base address of the page table for the
+ * incoming context.
+ * @context_idr: (16) Context Identification Register value.
+ */
+struct a5xx_cp_smmu_info {
+	uint32_t  magic;
+	uint32_t  _pad4;
+	uint64_t  ttbr0;
+	uint32_t  asid;
+	uint32_t  context_idr;
+};
+
+void a5xx_snapshot(struct adreno_device *adreno_dev,
+		struct kgsl_snapshot *snapshot);
+unsigned int a5xx_num_registers(void);
+
+void a5xx_crashdump_init(struct adreno_device *adreno_dev);
+
+void a5xx_hwcg_set(struct adreno_device *adreno_dev, bool on);
+
+#define A5XX_CP_RB_CNTL_DEFAULT ((1 << 27) | ((ilog2(4) << 8) & 0x1F00) | \
+		(ilog2(KGSL_RB_DWORDS >> 1) & 0x3F))
+/* GPMU interrupt multiplexor */
+#define FW_INTR_INFO			(0)
+#define LLM_ACK_ERR_INTR		(1)
+#define ISENS_TRIM_ERR_INTR		(2)
+#define ISENS_ERR_INTR			(3)
+#define ISENS_IDLE_ERR_INTR		(4)
+#define ISENS_PWR_ON_ERR_INTR		(5)
+#define WDOG_EXPITED			(31)
+
+#define VALID_GPMU_IRQ (\
+	BIT(FW_INTR_INFO) | \
+	BIT(LLM_ACK_ERR_INTR) | \
+	BIT(ISENS_TRIM_ERR_INTR) | \
+	BIT(ISENS_ERR_INTR) | \
+	BIT(ISENS_IDLE_ERR_INTR) | \
+	BIT(ISENS_PWR_ON_ERR_INTR) | \
+	BIT(WDOG_EXPITED))
+
+/* A5XX_GPMU_GPMU_LLM_GLM_SLEEP_CTRL */
+#define STATE_OF_CHILD			GENMASK(5, 4)
+#define STATE_OF_CHILD_01		BIT(4)
+#define STATE_OF_CHILD_11		(BIT(4) | BIT(5))
+#define IDLE_FULL_LM_SLEEP		BIT(0)
+
+/* A5XX_GPMU_GPMU_LLM_GLM_SLEEP_STATUS */
+#define WAKEUP_ACK			BIT(1)
+#define IDLE_FULL_ACK			BIT(0)
+
+/* A5XX_GPMU_GPMU_ISENSE_CTRL */
+#define	ISENSE_CGC_EN_DISABLE		BIT(0)
+
+/* A5XX_GPMU_TEMP_SENSOR_CONFIG */
+#define GPMU_BCL_ENABLED		BIT(4)
+#define GPMU_LLM_ENABLED		BIT(9)
+#define GPMU_ISENSE_STATUS		GENMASK(3, 0)
+#define GPMU_ISENSE_END_POINT_CAL_ERR	BIT(0)
+
+#define AMP_CALIBRATION_RETRY_CNT	3
+#define AMP_CALIBRATION_TIMEOUT		6
+
+/* A5XX_GPMU_GPMU_VOLTAGE_INTR_EN_MASK */
+#define VOLTAGE_INTR_EN			BIT(0)
+
+/* A5XX_GPMU_GPMU_PWR_THRESHOLD */
+#define PWR_THRESHOLD_VALID		0x80000000
+
+/* A5XX_GPMU_GPMU_SP_CLOCK_CONTROL */
+#define CNTL_IP_CLK_ENABLE		BIT(0)
+/* AGC */
+#define AGC_INIT_BASE			A5XX_GPMU_DATA_RAM_BASE
+#define AGC_INIT_MSG_MAGIC		(AGC_INIT_BASE + 5)
+#define AGC_MSG_BASE			(AGC_INIT_BASE + 7)
+
+#define AGC_MSG_STATE			(AGC_MSG_BASE + 0)
+#define AGC_MSG_COMMAND			(AGC_MSG_BASE + 1)
+#define AGC_MSG_PAYLOAD_SIZE		(AGC_MSG_BASE + 3)
+#define AGC_MSG_PAYLOAD			(AGC_MSG_BASE + 5)
+
+#define AGC_INIT_MSG_VALUE		0xBABEFACE
+#define AGC_POWER_CONFIG_PRODUCTION_ID	1
+
+#define AGC_LM_CONFIG			(136/4)
+#define AGC_LM_CONFIG_ENABLE_GPMU_ADAPTIVE (1)
+
+#define AGC_LM_CONFIG_ENABLE_ERROR	(3 << 4)
+#define AGC_LM_CONFIG_ISENSE_ENABLE     (1 << 4)
+
+#define AGC_THROTTLE_SEL_DCS		(1 << 8)
+#define AGC_THROTTLE_DISABLE            (2 << 8)
+
+
+#define AGC_LLM_ENABLED			(1 << 16)
+#define	AGC_GPU_VERSION_MASK		GENMASK(18, 17)
+#define AGC_GPU_VERSION_SHIFT		17
+#define AGC_BCL_DISABLED		(1 << 24)
+
+
+#define AGC_LEVEL_CONFIG		(140/4)
+
+#define LM_DCVS_LIMIT			1
+/* FW file tages */
+#define GPMU_FIRMWARE_ID		2
+#define GPMU_SEQUENCE_ID		3
+#define GPMU_INST_RAM_SIZE		0xFFF
+
+#define HEADER_MAJOR			1
+#define HEADER_MINOR			2
+#define HEADER_DATE			3
+#define HEADER_TIME			4
+#define HEADER_SEQUENCE			5
+
+#define MAX_HEADER_SIZE			10
+
+#define LM_SEQUENCE_ID			1
+#define MAX_SEQUENCE_ID			3
+
+#define GPMU_ISENSE_SAVE	(A5XX_GPMU_DATA_RAM_BASE + 200/4)
+/* LM defaults */
+#define LM_DEFAULT_LIMIT		6000
+#define A530_DEFAULT_LEAKAGE		0x004E001A
+
+/**
+ * to_a5xx_core - return the a5xx specific GPU core struct
+ * @adreno_dev: An Adreno GPU device handle
+ *
+ * Returns:
+ * A pointer to the a5xx specific GPU core struct
+ */
+static inline const struct adreno_a5xx_core *
+to_a5xx_core(struct adreno_device *adreno_dev)
+{
+	const struct adreno_gpu_core *core = adreno_dev->gpucore;
+
+	return container_of(core, struct adreno_a5xx_core, base);
+}
+
+/* Preemption functions */
+void a5xx_preemption_trigger(struct adreno_device *adreno_dev);
+void a5xx_preemption_schedule(struct adreno_device *adreno_dev);
+void a5xx_preemption_start(struct adreno_device *adreno_dev);
+int a5xx_preemption_init(struct adreno_device *adreno_dev);
+
+/**
+ * a5xx_preemption_post_ibsubmit - Insert commands following a submission
+ * @adreno_dev: Adreno GPU handle
+ * @cmds: Pointer to the ringbuffer to insert opcodes
+ *
+ * Return: The number of opcodes written to @cmds
+ */
+u32 a5xx_preemption_post_ibsubmit(struct adreno_device *adreno_dev, u32 *cmds);
+
+/**
+ * a5xx_preemption_post_ibsubmit - Insert opcodes before a submission
+ * @adreno_dev: Adreno GPU handle
+ * @rb: The ringbuffer being written
+ * @drawctxt: The draw context being written
+ * @cmds: Pointer to the ringbuffer to insert opcodes
+ *
+ * Return: The number of opcodes written to @cmds
+ */
+u32 a5xx_preemption_pre_ibsubmit(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb, struct adreno_context *drawctxt,
+		u32 *cmds);
+
+void a5xx_preempt_callback(struct adreno_device *adreno_dev, int bit);
+
+u64 a5xx_read_alwayson(struct adreno_device *adreno_dev);
+
+extern const struct adreno_perfcounters adreno_a5xx_perfcounters;
+
+/**
+ * a5xx_ringbuffer_init - Initialize the ringbuffers
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * Initialize the ringbuffer(s) for a5xx.
+ * Return: 0 on success or negative on failure
+ */
+int a5xx_ringbuffer_init(struct adreno_device *adreno_dev);
+
+/**
+ * a5xx_ringbuffer_addcmds - Submit a command to the ringbuffer
+ * @adreno_dev: An Adreno GPU handle
+ * @rb: Pointer to the ringbuffer to submit on
+ * @drawctxt: Pointer to the draw context for the submission, or NULL for
+ * internal submissions
+ * @flags: Flags for the submission
+ * @in: Commands to write to the ringbuffer
+ * @dwords: Size of @in (in dwords)
+ * @timestamp: Timestamp for the submission
+ * @time: Optional pointer to a submit time structure
+ *
+ * Submit a command to the ringbuffer.
+ * Return: 0 on success or negative on failure
+ */
+int a5xx_ringbuffer_addcmds(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb, struct adreno_context *drawctxt,
+		u32 flags, u32 *in, u32 dwords, u32 timestamp,
+		struct adreno_submit_time *time);
+
+/**
+ * a5xx_ringbuffer_submitcmd - Submit a user command to the ringbuffer
+ * @adreno_dev: An Adreno GPU handle
+ * @cmdobj: Pointer to a user command object
+ * @flags: Internal submit flags
+ * @time: Optional pointer to a adreno_submit_time container
+ *
+ * Return: 0 on success or negative on failure
+ */
+int a5xx_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
+		struct kgsl_drawobj_cmd *cmdobj, u32 flags,
+		struct adreno_submit_time *time);
+
+int a5xx_ringbuffer_submit(struct adreno_ringbuffer *rb,
+		struct adreno_submit_time *time, bool sync);
+
+static inline bool a5xx_has_gpmu(struct adreno_device *adreno_dev)
+{
+	return (adreno_is_a530(adreno_dev) || adreno_is_a540(adreno_dev));
+}
+
+#ifdef CONFIG_QCOM_KGSL_CORESIGHT
+void a5xx_coresight_init(struct adreno_device *device);
+#else
+static inline void a5xx_coresight_init(struct adreno_device *device) { }
+#endif
+
+#endif
--- a/qcom/opensource/graphics-kernel/adreno_a5xx_coresight.c
+++ b/qcom/opensource/graphics-kernel/adreno_a5xx_coresight.c
@ -0,0 +1,208 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include "adreno.h"
+#include "adreno_a5xx.h"
+#include "adreno_coresight.h"
+
+static struct adreno_coresight_register a5xx_coresight_registers[] = {
+	{ A5XX_RBBM_CFG_DBGBUS_SEL_A },
+	{ A5XX_RBBM_CFG_DBGBUS_SEL_B },
+	{ A5XX_RBBM_CFG_DBGBUS_SEL_C },
+	{ A5XX_RBBM_CFG_DBGBUS_SEL_D },
+	{ A5XX_RBBM_CFG_DBGBUS_CNTLT },
+	{ A5XX_RBBM_CFG_DBGBUS_CNTLM },
+	{ A5XX_RBBM_CFG_DBGBUS_OPL },
+	{ A5XX_RBBM_CFG_DBGBUS_OPE },
+	{ A5XX_RBBM_CFG_DBGBUS_IVTL_0 },
+	{ A5XX_RBBM_CFG_DBGBUS_IVTL_1 },
+	{ A5XX_RBBM_CFG_DBGBUS_IVTL_2 },
+	{ A5XX_RBBM_CFG_DBGBUS_IVTL_3 },
+	{ A5XX_RBBM_CFG_DBGBUS_MASKL_0 },
+	{ A5XX_RBBM_CFG_DBGBUS_MASKL_1 },
+	{ A5XX_RBBM_CFG_DBGBUS_MASKL_2 },
+	{ A5XX_RBBM_CFG_DBGBUS_MASKL_3 },
+	{ A5XX_RBBM_CFG_DBGBUS_BYTEL_0 },
+	{ A5XX_RBBM_CFG_DBGBUS_BYTEL_1 },
+	{ A5XX_RBBM_CFG_DBGBUS_IVTE_0 },
+	{ A5XX_RBBM_CFG_DBGBUS_IVTE_1 },
+	{ A5XX_RBBM_CFG_DBGBUS_IVTE_2 },
+	{ A5XX_RBBM_CFG_DBGBUS_IVTE_3 },
+	{ A5XX_RBBM_CFG_DBGBUS_MASKE_0 },
+	{ A5XX_RBBM_CFG_DBGBUS_MASKE_1 },
+	{ A5XX_RBBM_CFG_DBGBUS_MASKE_2 },
+	{ A5XX_RBBM_CFG_DBGBUS_MASKE_3 },
+	{ A5XX_RBBM_CFG_DBGBUS_NIBBLEE },
+	{ A5XX_RBBM_CFG_DBGBUS_PTRC0 },
+	{ A5XX_RBBM_CFG_DBGBUS_PTRC1 },
+	{ A5XX_RBBM_CFG_DBGBUS_LOADREG },
+	{ A5XX_RBBM_CFG_DBGBUS_IDX },
+	{ A5XX_RBBM_CFG_DBGBUS_CLRC },
+	{ A5XX_RBBM_CFG_DBGBUS_LOADIVT },
+	{ A5XX_RBBM_CFG_DBGBUS_EVENT_LOGIC },
+	{ A5XX_RBBM_CFG_DBGBUS_OVER },
+	{ A5XX_RBBM_CFG_DBGBUS_COUNT0 },
+	{ A5XX_RBBM_CFG_DBGBUS_COUNT1 },
+	{ A5XX_RBBM_CFG_DBGBUS_COUNT2 },
+	{ A5XX_RBBM_CFG_DBGBUS_COUNT3 },
+	{ A5XX_RBBM_CFG_DBGBUS_COUNT4 },
+	{ A5XX_RBBM_CFG_DBGBUS_COUNT5 },
+	{ A5XX_RBBM_CFG_DBGBUS_TRACE_ADDR },
+	{ A5XX_RBBM_CFG_DBGBUS_TRACE_BUF0 },
+	{ A5XX_RBBM_CFG_DBGBUS_TRACE_BUF1 },
+	{ A5XX_RBBM_CFG_DBGBUS_TRACE_BUF2 },
+	{ A5XX_RBBM_CFG_DBGBUS_TRACE_BUF3 },
+	{ A5XX_RBBM_CFG_DBGBUS_TRACE_BUF4 },
+	{ A5XX_RBBM_CFG_DBGBUS_MISR0 },
+	{ A5XX_RBBM_CFG_DBGBUS_MISR1 },
+	{ A5XX_RBBM_AHB_DBG_CNTL },
+	{ A5XX_RBBM_READ_AHB_THROUGH_DBG },
+	{ A5XX_RBBM_DBG_LO_HI_GPIO },
+	{ A5XX_RBBM_EXT_TRACE_BUS_CNTL },
+	{ A5XX_RBBM_EXT_VBIF_DBG_CNTL },
+};
+
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_a, &a5xx_coresight_registers[0]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_b, &a5xx_coresight_registers[1]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_c, &a5xx_coresight_registers[2]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_d, &a5xx_coresight_registers[3]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_cntlt, &a5xx_coresight_registers[4]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_cntlm, &a5xx_coresight_registers[5]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_opl, &a5xx_coresight_registers[6]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ope, &a5xx_coresight_registers[7]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_0, &a5xx_coresight_registers[8]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_1, &a5xx_coresight_registers[9]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_2, &a5xx_coresight_registers[10]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_3, &a5xx_coresight_registers[11]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_0, &a5xx_coresight_registers[12]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_1, &a5xx_coresight_registers[13]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_2, &a5xx_coresight_registers[14]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_3, &a5xx_coresight_registers[15]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_bytel_0, &a5xx_coresight_registers[16]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_bytel_1, &a5xx_coresight_registers[17]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_0, &a5xx_coresight_registers[18]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_1, &a5xx_coresight_registers[19]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_2, &a5xx_coresight_registers[20]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_3, &a5xx_coresight_registers[21]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_0, &a5xx_coresight_registers[22]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_1, &a5xx_coresight_registers[23]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_2, &a5xx_coresight_registers[24]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_3, &a5xx_coresight_registers[25]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_nibblee, &a5xx_coresight_registers[26]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ptrc0, &a5xx_coresight_registers[27]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ptrc1, &a5xx_coresight_registers[28]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_loadreg, &a5xx_coresight_registers[29]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_idx, &a5xx_coresight_registers[30]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_clrc, &a5xx_coresight_registers[31]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_loadivt, &a5xx_coresight_registers[32]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_event_logic,
+				&a5xx_coresight_registers[33]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_over, &a5xx_coresight_registers[34]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_count0, &a5xx_coresight_registers[35]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_count1, &a5xx_coresight_registers[36]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_count2, &a5xx_coresight_registers[37]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_count3, &a5xx_coresight_registers[38]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_count4, &a5xx_coresight_registers[39]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_count5, &a5xx_coresight_registers[40]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_addr,
+				&a5xx_coresight_registers[41]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf0,
+				&a5xx_coresight_registers[42]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf1,
+				&a5xx_coresight_registers[43]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf2,
+				&a5xx_coresight_registers[44]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf3,
+				&a5xx_coresight_registers[45]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf4,
+				&a5xx_coresight_registers[46]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_misr0, &a5xx_coresight_registers[47]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_misr1, &a5xx_coresight_registers[48]);
+static ADRENO_CORESIGHT_ATTR(ahb_dbg_cntl, &a5xx_coresight_registers[49]);
+static ADRENO_CORESIGHT_ATTR(read_ahb_through_dbg,
+				&a5xx_coresight_registers[50]);
+static ADRENO_CORESIGHT_ATTR(dbg_lo_hi_gpio, &a5xx_coresight_registers[51]);
+static ADRENO_CORESIGHT_ATTR(ext_trace_bus_cntl, &a5xx_coresight_registers[52]);
+static ADRENO_CORESIGHT_ATTR(ext_vbif_dbg_cntl, &a5xx_coresight_registers[53]);
+
+static struct attribute *a5xx_coresight_attrs[] = {
+	&coresight_attr_cfg_dbgbus_sel_a.attr.attr,
+	&coresight_attr_cfg_dbgbus_sel_b.attr.attr,
+	&coresight_attr_cfg_dbgbus_sel_c.attr.attr,
+	&coresight_attr_cfg_dbgbus_sel_d.attr.attr,
+	&coresight_attr_cfg_dbgbus_cntlt.attr.attr,
+	&coresight_attr_cfg_dbgbus_cntlm.attr.attr,
+	&coresight_attr_cfg_dbgbus_opl.attr.attr,
+	&coresight_attr_cfg_dbgbus_ope.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivtl_0.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivtl_1.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivtl_2.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivtl_3.attr.attr,
+	&coresight_attr_cfg_dbgbus_maskl_0.attr.attr,
+	&coresight_attr_cfg_dbgbus_maskl_1.attr.attr,
+	&coresight_attr_cfg_dbgbus_maskl_2.attr.attr,
+	&coresight_attr_cfg_dbgbus_maskl_3.attr.attr,
+	&coresight_attr_cfg_dbgbus_bytel_0.attr.attr,
+	&coresight_attr_cfg_dbgbus_bytel_1.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivte_0.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivte_1.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivte_2.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivte_3.attr.attr,
+	&coresight_attr_cfg_dbgbus_maske_0.attr.attr,
+	&coresight_attr_cfg_dbgbus_maske_1.attr.attr,
+	&coresight_attr_cfg_dbgbus_maske_2.attr.attr,
+	&coresight_attr_cfg_dbgbus_maske_3.attr.attr,
+	&coresight_attr_cfg_dbgbus_nibblee.attr.attr,
+	&coresight_attr_cfg_dbgbus_ptrc0.attr.attr,
+	&coresight_attr_cfg_dbgbus_ptrc1.attr.attr,
+	&coresight_attr_cfg_dbgbus_loadreg.attr.attr,
+	&coresight_attr_cfg_dbgbus_idx.attr.attr,
+	&coresight_attr_cfg_dbgbus_clrc.attr.attr,
+	&coresight_attr_cfg_dbgbus_loadivt.attr.attr,
+	&coresight_attr_cfg_dbgbus_event_logic.attr.attr,
+	&coresight_attr_cfg_dbgbus_over.attr.attr,
+	&coresight_attr_cfg_dbgbus_count0.attr.attr,
+	&coresight_attr_cfg_dbgbus_count1.attr.attr,
+	&coresight_attr_cfg_dbgbus_count2.attr.attr,
+	&coresight_attr_cfg_dbgbus_count3.attr.attr,
+	&coresight_attr_cfg_dbgbus_count4.attr.attr,
+	&coresight_attr_cfg_dbgbus_count5.attr.attr,
+	&coresight_attr_cfg_dbgbus_trace_addr.attr.attr,
+	&coresight_attr_cfg_dbgbus_trace_buf0.attr.attr,
+	&coresight_attr_cfg_dbgbus_trace_buf1.attr.attr,
+	&coresight_attr_cfg_dbgbus_trace_buf2.attr.attr,
+	&coresight_attr_cfg_dbgbus_trace_buf3.attr.attr,
+	&coresight_attr_cfg_dbgbus_trace_buf4.attr.attr,
+	&coresight_attr_cfg_dbgbus_misr0.attr.attr,
+	&coresight_attr_cfg_dbgbus_misr1.attr.attr,
+	&coresight_attr_ahb_dbg_cntl.attr.attr,
+	&coresight_attr_read_ahb_through_dbg.attr.attr,
+	&coresight_attr_dbg_lo_hi_gpio.attr.attr,
+	&coresight_attr_ext_trace_bus_cntl.attr.attr,
+	&coresight_attr_ext_vbif_dbg_cntl.attr.attr,
+	NULL,
+};
+
+static const struct attribute_group a5xx_coresight_group = {
+	.attrs = a5xx_coresight_attrs,
+};
+
+static const struct attribute_group *a5xx_coresight_groups[] = {
+	&a5xx_coresight_group,
+	NULL,
+};
+
+static const struct adreno_coresight a5xx_coresight = {
+	.registers = a5xx_coresight_registers,
+	.count = ARRAY_SIZE(a5xx_coresight_registers),
+	.groups = a5xx_coresight_groups,
+};
+
+void a5xx_coresight_init(struct adreno_device *adreno_dev)
+{
+	adreno_coresight_add_device(adreno_dev, "coresight-gfx",
+		&a5xx_coresight, &adreno_dev->gx_coresight);
+}
--- a/qcom/opensource/graphics-kernel/adreno_a5xx_packets.h
+++ b/qcom/opensource/graphics-kernel/adreno_a5xx_packets.h
--- a/qcom/opensource/graphics-kernel/adreno_a5xx_perfcounter.c
+++ b/qcom/opensource/graphics-kernel/adreno_a5xx_perfcounter.c
@ -0,0 +1,695 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
+ */
+
+#include "adreno.h"
+#include "adreno_a5xx.h"
+#include "adreno_perfcounter.h"
+#include "adreno_pm4types.h"
+#include "kgsl_device.h"
+
+#define VBIF2_PERF_CNT_SEL_MASK 0x7F
+/* offset of clear register from select register */
+#define VBIF2_PERF_CLR_REG_SEL_OFF 8
+/* offset of enable register from select register */
+#define VBIF2_PERF_EN_REG_SEL_OFF 16
+/* offset of clear register from the enable register */
+#define VBIF2_PERF_PWR_CLR_REG_EN_OFF 8
+
+static void a5xx_counter_load(struct adreno_device *adreno_dev,
+		struct adreno_perfcount_register *reg)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int index = reg->load_bit / 32;
+	u32 enable = BIT(reg->load_bit & 31);
+
+	kgsl_regwrite(device, A5XX_RBBM_PERFCTR_LOAD_VALUE_LO,
+		lower_32_bits(reg->value));
+
+	kgsl_regwrite(device, A5XX_RBBM_PERFCTR_LOAD_VALUE_HI,
+		upper_32_bits(reg->value));
+
+	kgsl_regwrite(device, A5XX_RBBM_PERFCTR_LOAD_CMD0 + index, enable);
+}
+
+static u64 a5xx_counter_read_norestore(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+	u32 hi, lo;
+
+	kgsl_regread(device, reg->offset, &lo);
+	kgsl_regread(device, reg->offset_hi, &hi);
+
+	return ((((u64) hi) << 32) | lo) + reg->value;
+}
+
+static int a5xx_counter_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter, unsigned int countable)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+
+	kgsl_regwrite(device, reg->select, countable);
+	reg->value = 0;
+
+	return 0;
+}
+
+static int a5xx_counter_inline_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter, unsigned int countable)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+	struct adreno_ringbuffer *rb = &adreno_dev->ringbuffers[0];
+	u32 cmds[3];
+	int ret;
+
+	if (!(device->state == KGSL_STATE_ACTIVE))
+		return a5xx_counter_enable(adreno_dev, group, counter,
+			countable);
+
+	cmds[0]  = cp_type7_packet(CP_WAIT_FOR_IDLE, 0);
+	cmds[1] = cp_type4_packet(reg->select, 1);
+	cmds[2] = countable;
+
+	/* submit to highest priority RB always */
+	ret = a5xx_ringbuffer_addcmds(adreno_dev, rb, NULL,
+		F_NOTPROTECTED, cmds, 3, 0, NULL);
+
+	if (ret)
+		return ret;
+
+	/*
+	 * schedule dispatcher to make sure rb[0] is run, because
+	 * if the current RB is not rb[0] and gpu is idle then
+	 * rb[0] will not get scheduled to run
+	 */
+	if (adreno_dev->cur_rb != rb)
+		adreno_dispatcher_schedule(device);
+
+	/* wait for the above commands submitted to complete */
+	ret = adreno_ringbuffer_waittimestamp(rb, rb->timestamp,
+		ADRENO_IDLE_TIMEOUT);
+
+	if (ret) {
+		/*
+		 * If we were woken up because of cancelling rb events
+		 * either due to soft reset or adreno_stop, ignore the
+		 * error and return 0 here. The perfcounter is already
+		 * set up in software and it will be programmed in
+		 * hardware when we wake up or come up after soft reset,
+		 * by adreno_perfcounter_restore.
+		 */
+		if (ret == -EAGAIN)
+			ret = 0;
+		else
+			dev_err(device->dev,
+				     "Perfcounter %s/%u/%u start via commands failed %d\n",
+				     group->name, counter, countable, ret);
+	}
+
+	if (!ret)
+		reg->value = 0;
+
+	return ret;
+}
+
+static int a5xx_counter_rbbm_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter, unsigned int countable)
+{
+	if (adreno_is_a540(adreno_dev) && countable == A5XX_RBBM_ALWAYS_COUNT)
+		return -EINVAL;
+
+	return a5xx_counter_inline_enable(adreno_dev, group, counter,
+			countable);
+}
+
+static u64 a5xx_counter_read(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+	u32  hi, lo;
+
+	kgsl_regread(device, reg->offset, &lo);
+	kgsl_regread(device, reg->offset_hi, &hi);
+
+	return (((u64) hi) << 32) | lo;
+}
+
+static int a5xx_counter_vbif_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter, unsigned int countable)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+
+	if (countable > VBIF2_PERF_CNT_SEL_MASK)
+		return -EINVAL;
+
+	/*
+	 * Write 1, followed by 0 to CLR register for
+	 * clearing the counter
+	 */
+	kgsl_regwrite(device,
+		reg->select - VBIF2_PERF_CLR_REG_SEL_OFF, 1);
+	kgsl_regwrite(device,
+		reg->select - VBIF2_PERF_CLR_REG_SEL_OFF, 0);
+	kgsl_regwrite(device,
+		reg->select, countable & VBIF2_PERF_CNT_SEL_MASK);
+	/* enable reg is 8 DWORDS before select reg */
+	kgsl_regwrite(device,
+		reg->select - VBIF2_PERF_EN_REG_SEL_OFF, 1);
+
+	kgsl_regwrite(device, reg->select, countable);
+
+	reg->value = 0;
+	return 0;
+}
+
+static int a5xx_counter_vbif_pwr_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter, unsigned int countable)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+
+	/*
+	 * Write 1, followed by 0 to CLR register for
+	 * clearing the counter
+	 */
+	kgsl_regwrite(device, reg->select +
+		VBIF2_PERF_PWR_CLR_REG_EN_OFF, 1);
+	kgsl_regwrite(device, reg->select +
+		VBIF2_PERF_PWR_CLR_REG_EN_OFF, 0);
+	kgsl_regwrite(device, reg->select, 1);
+
+	reg->value = 0;
+
+	return 0;
+}
+
+static int a5xx_counter_alwayson_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter, unsigned int countable)
+{
+	return 0;
+}
+
+static u64 a5xx_counter_alwayson_read(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter)
+{
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+
+	return a5xx_read_alwayson(adreno_dev) + reg->value;
+}
+
+static int a5xx_counter_pwr_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter, unsigned int countable)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+
+	kgsl_regwrite(device, reg->select, countable);
+	kgsl_regwrite(device, A5XX_GPMU_POWER_COUNTER_ENABLE, 1);
+
+	reg->value = 0;
+	return 0;
+}
+
+static int a5xx_counter_pwr_gpmu_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter, unsigned int countable)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+	unsigned int shift = (counter << 3) % (sizeof(unsigned int) * 8);
+
+	if (adreno_is_a530(adreno_dev)) {
+		if (countable > 43)
+			return -EINVAL;
+	} else if (adreno_is_a540(adreno_dev)) {
+		if (countable > 47)
+			return -EINVAL;
+	}
+
+	kgsl_regrmw(device, reg->select, 0xff << shift, countable << shift);
+	kgsl_regwrite(device, A5XX_GPMU_POWER_COUNTER_ENABLE, 1);
+
+	reg->value = 0;
+	return 0;
+}
+
+static int a5xx_counter_pwr_alwayson_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		unsigned int counter, unsigned int countable)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg = &group->regs[counter];
+
+	kgsl_regwrite(device, A5XX_GPMU_ALWAYS_ON_COUNTER_RESET, 1);
+
+	reg->value = 0;
+	return 0;
+}
+
+static struct adreno_perfcount_register a5xx_perfcounters_cp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_0_LO,
+		A5XX_RBBM_PERFCTR_CP_0_HI, 0, A5XX_CP_PERFCTR_CP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_1_LO,
+		A5XX_RBBM_PERFCTR_CP_1_HI, 1, A5XX_CP_PERFCTR_CP_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_2_LO,
+		A5XX_RBBM_PERFCTR_CP_2_HI, 2, A5XX_CP_PERFCTR_CP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_3_LO,
+		A5XX_RBBM_PERFCTR_CP_3_HI, 3, A5XX_CP_PERFCTR_CP_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_4_LO,
+		A5XX_RBBM_PERFCTR_CP_4_HI, 4, A5XX_CP_PERFCTR_CP_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_5_LO,
+		A5XX_RBBM_PERFCTR_CP_5_HI, 5, A5XX_CP_PERFCTR_CP_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_6_LO,
+		A5XX_RBBM_PERFCTR_CP_6_HI, 6, A5XX_CP_PERFCTR_CP_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_7_LO,
+		A5XX_RBBM_PERFCTR_CP_7_HI, 7, A5XX_CP_PERFCTR_CP_SEL_7 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_rbbm[] = {
+	/*
+	 * A5XX_RBBM_PERFCTR_RBBM_0 is used for frequency scaling and omitted
+	 * from the poool of available counters
+	 */
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RBBM_1_LO,
+		A5XX_RBBM_PERFCTR_RBBM_1_HI, 9, A5XX_RBBM_PERFCTR_RBBM_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RBBM_2_LO,
+		A5XX_RBBM_PERFCTR_RBBM_2_HI, 10, A5XX_RBBM_PERFCTR_RBBM_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RBBM_3_LO,
+		A5XX_RBBM_PERFCTR_RBBM_3_HI, 11, A5XX_RBBM_PERFCTR_RBBM_SEL_3 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_pc[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_0_LO,
+		A5XX_RBBM_PERFCTR_PC_0_HI, 12, A5XX_PC_PERFCTR_PC_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_1_LO,
+		A5XX_RBBM_PERFCTR_PC_1_HI, 13, A5XX_PC_PERFCTR_PC_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_2_LO,
+		A5XX_RBBM_PERFCTR_PC_2_HI, 14, A5XX_PC_PERFCTR_PC_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_3_LO,
+		A5XX_RBBM_PERFCTR_PC_3_HI, 15, A5XX_PC_PERFCTR_PC_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_4_LO,
+		A5XX_RBBM_PERFCTR_PC_4_HI, 16, A5XX_PC_PERFCTR_PC_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_5_LO,
+		A5XX_RBBM_PERFCTR_PC_5_HI, 17, A5XX_PC_PERFCTR_PC_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_6_LO,
+		A5XX_RBBM_PERFCTR_PC_6_HI, 18, A5XX_PC_PERFCTR_PC_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_7_LO,
+		A5XX_RBBM_PERFCTR_PC_7_HI, 19, A5XX_PC_PERFCTR_PC_SEL_7 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_vfd[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_0_LO,
+		A5XX_RBBM_PERFCTR_VFD_0_HI, 20, A5XX_VFD_PERFCTR_VFD_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_1_LO,
+		A5XX_RBBM_PERFCTR_VFD_1_HI, 21, A5XX_VFD_PERFCTR_VFD_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_2_LO,
+		A5XX_RBBM_PERFCTR_VFD_2_HI, 22, A5XX_VFD_PERFCTR_VFD_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_3_LO,
+		A5XX_RBBM_PERFCTR_VFD_3_HI, 23, A5XX_VFD_PERFCTR_VFD_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_4_LO,
+		A5XX_RBBM_PERFCTR_VFD_4_HI, 24, A5XX_VFD_PERFCTR_VFD_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_5_LO,
+		A5XX_RBBM_PERFCTR_VFD_5_HI, 25, A5XX_VFD_PERFCTR_VFD_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_6_LO,
+		A5XX_RBBM_PERFCTR_VFD_6_HI, 26, A5XX_VFD_PERFCTR_VFD_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_7_LO,
+		A5XX_RBBM_PERFCTR_VFD_7_HI, 27, A5XX_VFD_PERFCTR_VFD_SEL_7 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_hlsq[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_0_LO,
+		A5XX_RBBM_PERFCTR_HLSQ_0_HI, 28, A5XX_HLSQ_PERFCTR_HLSQ_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_1_LO,
+		A5XX_RBBM_PERFCTR_HLSQ_1_HI, 29, A5XX_HLSQ_PERFCTR_HLSQ_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_2_LO,
+		A5XX_RBBM_PERFCTR_HLSQ_2_HI, 30, A5XX_HLSQ_PERFCTR_HLSQ_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_3_LO,
+		A5XX_RBBM_PERFCTR_HLSQ_3_HI, 31, A5XX_HLSQ_PERFCTR_HLSQ_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_4_LO,
+		A5XX_RBBM_PERFCTR_HLSQ_4_HI, 32, A5XX_HLSQ_PERFCTR_HLSQ_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_5_LO,
+		A5XX_RBBM_PERFCTR_HLSQ_5_HI, 33, A5XX_HLSQ_PERFCTR_HLSQ_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_6_LO,
+		A5XX_RBBM_PERFCTR_HLSQ_6_HI, 34, A5XX_HLSQ_PERFCTR_HLSQ_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_7_LO,
+		A5XX_RBBM_PERFCTR_HLSQ_7_HI, 35, A5XX_HLSQ_PERFCTR_HLSQ_SEL_7 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_vpc[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VPC_0_LO,
+		A5XX_RBBM_PERFCTR_VPC_0_HI, 36, A5XX_VPC_PERFCTR_VPC_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VPC_1_LO,
+		A5XX_RBBM_PERFCTR_VPC_1_HI, 37, A5XX_VPC_PERFCTR_VPC_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VPC_2_LO,
+		A5XX_RBBM_PERFCTR_VPC_2_HI, 38, A5XX_VPC_PERFCTR_VPC_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VPC_3_LO,
+		A5XX_RBBM_PERFCTR_VPC_3_HI, 39, A5XX_VPC_PERFCTR_VPC_SEL_3 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_ccu[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CCU_0_LO,
+		A5XX_RBBM_PERFCTR_CCU_0_HI, 40, A5XX_RB_PERFCTR_CCU_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CCU_1_LO,
+		A5XX_RBBM_PERFCTR_CCU_1_HI, 41, A5XX_RB_PERFCTR_CCU_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CCU_2_LO,
+		A5XX_RBBM_PERFCTR_CCU_2_HI, 42, A5XX_RB_PERFCTR_CCU_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CCU_3_LO,
+		A5XX_RBBM_PERFCTR_CCU_3_HI, 43, A5XX_RB_PERFCTR_CCU_SEL_3 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_tse[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TSE_0_LO,
+		A5XX_RBBM_PERFCTR_TSE_0_HI, 44, A5XX_GRAS_PERFCTR_TSE_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TSE_1_LO,
+		A5XX_RBBM_PERFCTR_TSE_1_HI, 45, A5XX_GRAS_PERFCTR_TSE_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TSE_2_LO,
+		A5XX_RBBM_PERFCTR_TSE_2_HI, 46, A5XX_GRAS_PERFCTR_TSE_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TSE_3_LO,
+		A5XX_RBBM_PERFCTR_TSE_3_HI, 47, A5XX_GRAS_PERFCTR_TSE_SEL_3 },
+};
+
+
+static struct adreno_perfcount_register a5xx_perfcounters_ras[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RAS_0_LO,
+		A5XX_RBBM_PERFCTR_RAS_0_HI, 48, A5XX_GRAS_PERFCTR_RAS_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RAS_1_LO,
+		A5XX_RBBM_PERFCTR_RAS_1_HI, 49, A5XX_GRAS_PERFCTR_RAS_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RAS_2_LO,
+		A5XX_RBBM_PERFCTR_RAS_2_HI, 50, A5XX_GRAS_PERFCTR_RAS_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RAS_3_LO,
+		A5XX_RBBM_PERFCTR_RAS_3_HI, 51, A5XX_GRAS_PERFCTR_RAS_SEL_3 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_uche[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_0_LO,
+		A5XX_RBBM_PERFCTR_UCHE_0_HI, 52, A5XX_UCHE_PERFCTR_UCHE_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_1_LO,
+		A5XX_RBBM_PERFCTR_UCHE_1_HI, 53, A5XX_UCHE_PERFCTR_UCHE_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_2_LO,
+		A5XX_RBBM_PERFCTR_UCHE_2_HI, 54, A5XX_UCHE_PERFCTR_UCHE_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_3_LO,
+		A5XX_RBBM_PERFCTR_UCHE_3_HI, 55, A5XX_UCHE_PERFCTR_UCHE_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_4_LO,
+		A5XX_RBBM_PERFCTR_UCHE_4_HI, 56, A5XX_UCHE_PERFCTR_UCHE_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_5_LO,
+		A5XX_RBBM_PERFCTR_UCHE_5_HI, 57, A5XX_UCHE_PERFCTR_UCHE_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_6_LO,
+		A5XX_RBBM_PERFCTR_UCHE_6_HI, 58, A5XX_UCHE_PERFCTR_UCHE_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_7_LO,
+		A5XX_RBBM_PERFCTR_UCHE_7_HI, 59, A5XX_UCHE_PERFCTR_UCHE_SEL_7 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_tp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_0_LO,
+		A5XX_RBBM_PERFCTR_TP_0_HI, 60, A5XX_TPL1_PERFCTR_TP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_1_LO,
+		A5XX_RBBM_PERFCTR_TP_1_HI, 61, A5XX_TPL1_PERFCTR_TP_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_2_LO,
+		A5XX_RBBM_PERFCTR_TP_2_HI, 62, A5XX_TPL1_PERFCTR_TP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_3_LO,
+		A5XX_RBBM_PERFCTR_TP_3_HI, 63, A5XX_TPL1_PERFCTR_TP_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_4_LO,
+		A5XX_RBBM_PERFCTR_TP_4_HI, 64, A5XX_TPL1_PERFCTR_TP_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_5_LO,
+		A5XX_RBBM_PERFCTR_TP_5_HI, 65, A5XX_TPL1_PERFCTR_TP_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_6_LO,
+		A5XX_RBBM_PERFCTR_TP_6_HI, 66, A5XX_TPL1_PERFCTR_TP_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_7_LO,
+		A5XX_RBBM_PERFCTR_TP_7_HI, 67, A5XX_TPL1_PERFCTR_TP_SEL_7 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_sp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_0_LO,
+		A5XX_RBBM_PERFCTR_SP_0_HI, 68, A5XX_SP_PERFCTR_SP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_1_LO,
+		A5XX_RBBM_PERFCTR_SP_1_HI, 69, A5XX_SP_PERFCTR_SP_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_2_LO,
+		A5XX_RBBM_PERFCTR_SP_2_HI, 70, A5XX_SP_PERFCTR_SP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_3_LO,
+		A5XX_RBBM_PERFCTR_SP_3_HI, 71, A5XX_SP_PERFCTR_SP_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_4_LO,
+		A5XX_RBBM_PERFCTR_SP_4_HI, 72, A5XX_SP_PERFCTR_SP_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_5_LO,
+		A5XX_RBBM_PERFCTR_SP_5_HI, 73, A5XX_SP_PERFCTR_SP_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_6_LO,
+		A5XX_RBBM_PERFCTR_SP_6_HI, 74, A5XX_SP_PERFCTR_SP_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_7_LO,
+		A5XX_RBBM_PERFCTR_SP_7_HI, 75, A5XX_SP_PERFCTR_SP_SEL_7 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_8_LO,
+		A5XX_RBBM_PERFCTR_SP_8_HI, 76, A5XX_SP_PERFCTR_SP_SEL_8 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_9_LO,
+		A5XX_RBBM_PERFCTR_SP_9_HI, 77, A5XX_SP_PERFCTR_SP_SEL_9 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_10_LO,
+		A5XX_RBBM_PERFCTR_SP_10_HI, 78, A5XX_SP_PERFCTR_SP_SEL_10 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_11_LO,
+		A5XX_RBBM_PERFCTR_SP_11_HI, 79, A5XX_SP_PERFCTR_SP_SEL_11 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_rb[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_0_LO,
+		A5XX_RBBM_PERFCTR_RB_0_HI, 80, A5XX_RB_PERFCTR_RB_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_1_LO,
+		A5XX_RBBM_PERFCTR_RB_1_HI, 81, A5XX_RB_PERFCTR_RB_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_2_LO,
+		A5XX_RBBM_PERFCTR_RB_2_HI, 82, A5XX_RB_PERFCTR_RB_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_3_LO,
+		A5XX_RBBM_PERFCTR_RB_3_HI, 83, A5XX_RB_PERFCTR_RB_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_4_LO,
+		A5XX_RBBM_PERFCTR_RB_4_HI, 84, A5XX_RB_PERFCTR_RB_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_5_LO,
+		A5XX_RBBM_PERFCTR_RB_5_HI, 85, A5XX_RB_PERFCTR_RB_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_6_LO,
+		A5XX_RBBM_PERFCTR_RB_6_HI, 86, A5XX_RB_PERFCTR_RB_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_7_LO,
+		A5XX_RBBM_PERFCTR_RB_7_HI, 87, A5XX_RB_PERFCTR_RB_SEL_7 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_vsc[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VSC_0_LO,
+		A5XX_RBBM_PERFCTR_VSC_0_HI, 88, A5XX_VSC_PERFCTR_VSC_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VSC_1_LO,
+		A5XX_RBBM_PERFCTR_VSC_1_HI, 89, A5XX_VSC_PERFCTR_VSC_SEL_1 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_lrz[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_LRZ_0_LO,
+		A5XX_RBBM_PERFCTR_LRZ_0_HI, 90, A5XX_GRAS_PERFCTR_LRZ_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_LRZ_1_LO,
+		A5XX_RBBM_PERFCTR_LRZ_1_HI, 91, A5XX_GRAS_PERFCTR_LRZ_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_LRZ_2_LO,
+		A5XX_RBBM_PERFCTR_LRZ_2_HI, 92, A5XX_GRAS_PERFCTR_LRZ_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_LRZ_3_LO,
+		A5XX_RBBM_PERFCTR_LRZ_3_HI, 93, A5XX_GRAS_PERFCTR_LRZ_SEL_3 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_cmp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CMP_0_LO,
+		A5XX_RBBM_PERFCTR_CMP_0_HI, 94, A5XX_RB_PERFCTR_CMP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CMP_1_LO,
+		A5XX_RBBM_PERFCTR_CMP_1_HI, 95, A5XX_RB_PERFCTR_CMP_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CMP_2_LO,
+		A5XX_RBBM_PERFCTR_CMP_2_HI, 96, A5XX_RB_PERFCTR_CMP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CMP_3_LO,
+		A5XX_RBBM_PERFCTR_CMP_3_HI, 97, A5XX_RB_PERFCTR_CMP_SEL_3 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_vbif[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_CNT_LOW0,
+		A5XX_VBIF_PERF_CNT_HIGH0, -1, A5XX_VBIF_PERF_CNT_SEL0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_CNT_LOW1,
+		A5XX_VBIF_PERF_CNT_HIGH1, -1, A5XX_VBIF_PERF_CNT_SEL1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_CNT_LOW2,
+		A5XX_VBIF_PERF_CNT_HIGH2, -1, A5XX_VBIF_PERF_CNT_SEL2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_CNT_LOW3,
+		A5XX_VBIF_PERF_CNT_HIGH3, -1, A5XX_VBIF_PERF_CNT_SEL3 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_vbif_pwr[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_PWR_CNT_LOW0,
+		A5XX_VBIF_PERF_PWR_CNT_HIGH0, -1, A5XX_VBIF_PERF_PWR_CNT_EN0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_PWR_CNT_LOW1,
+		A5XX_VBIF_PERF_PWR_CNT_HIGH1, -1, A5XX_VBIF_PERF_PWR_CNT_EN1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_PWR_CNT_LOW2,
+		A5XX_VBIF_PERF_PWR_CNT_HIGH2, -1, A5XX_VBIF_PERF_PWR_CNT_EN2 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_alwayson[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_ALWAYSON_COUNTER_LO,
+		A5XX_RBBM_ALWAYSON_COUNTER_HI, -1 },
+};
+
+static struct adreno_perfcount_register a5xx_pwrcounters_sp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_SP_POWER_COUNTER_0_LO,
+		A5XX_SP_POWER_COUNTER_0_HI, -1, A5XX_SP_POWERCTR_SP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_SP_POWER_COUNTER_1_LO,
+		A5XX_SP_POWER_COUNTER_1_HI, -1, A5XX_SP_POWERCTR_SP_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_SP_POWER_COUNTER_2_LO,
+		A5XX_SP_POWER_COUNTER_2_HI, -1, A5XX_SP_POWERCTR_SP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_SP_POWER_COUNTER_3_LO,
+		A5XX_SP_POWER_COUNTER_3_HI, -1, A5XX_SP_POWERCTR_SP_SEL_3 },
+};
+
+static struct adreno_perfcount_register a5xx_pwrcounters_tp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_TP_POWER_COUNTER_0_LO,
+		A5XX_TP_POWER_COUNTER_0_HI, -1, A5XX_TPL1_POWERCTR_TP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_TP_POWER_COUNTER_1_LO,
+		A5XX_TP_POWER_COUNTER_1_HI, -1, A5XX_TPL1_POWERCTR_TP_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_TP_POWER_COUNTER_2_LO,
+		A5XX_TP_POWER_COUNTER_2_HI, -1, A5XX_TPL1_POWERCTR_TP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_TP_POWER_COUNTER_3_LO,
+		A5XX_TP_POWER_COUNTER_3_HI, -1, A5XX_TPL1_POWERCTR_TP_SEL_3 },
+};
+
+static struct adreno_perfcount_register a5xx_pwrcounters_rb[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RB_POWER_COUNTER_0_LO,
+		A5XX_RB_POWER_COUNTER_0_HI, -1, A5XX_RB_POWERCTR_RB_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RB_POWER_COUNTER_1_LO,
+		A5XX_RB_POWER_COUNTER_1_HI, -1, A5XX_RB_POWERCTR_RB_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RB_POWER_COUNTER_2_LO,
+		A5XX_RB_POWER_COUNTER_2_HI, -1, A5XX_RB_POWERCTR_RB_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RB_POWER_COUNTER_3_LO,
+		A5XX_RB_POWER_COUNTER_3_HI, -1, A5XX_RB_POWERCTR_RB_SEL_3 },
+};
+
+static struct adreno_perfcount_register a5xx_pwrcounters_ccu[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_CCU_POWER_COUNTER_0_LO,
+		A5XX_CCU_POWER_COUNTER_0_HI, -1, A5XX_RB_POWERCTR_CCU_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_CCU_POWER_COUNTER_1_LO,
+		A5XX_CCU_POWER_COUNTER_1_HI, -1, A5XX_RB_POWERCTR_CCU_SEL_1 },
+};
+
+static struct adreno_perfcount_register a5xx_pwrcounters_uche[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_UCHE_POWER_COUNTER_0_LO,
+		A5XX_UCHE_POWER_COUNTER_0_HI, -1,
+		A5XX_UCHE_POWERCTR_UCHE_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_UCHE_POWER_COUNTER_1_LO,
+		A5XX_UCHE_POWER_COUNTER_1_HI, -1,
+		A5XX_UCHE_POWERCTR_UCHE_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_UCHE_POWER_COUNTER_2_LO,
+		A5XX_UCHE_POWER_COUNTER_2_HI, -1,
+		A5XX_UCHE_POWERCTR_UCHE_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_UCHE_POWER_COUNTER_3_LO,
+		A5XX_UCHE_POWER_COUNTER_3_HI, -1,
+		A5XX_UCHE_POWERCTR_UCHE_SEL_3 },
+};
+
+static struct adreno_perfcount_register a5xx_pwrcounters_cp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_CP_POWER_COUNTER_0_LO,
+		A5XX_CP_POWER_COUNTER_0_HI, -1, A5XX_CP_POWERCTR_CP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_CP_POWER_COUNTER_1_LO,
+		A5XX_CP_POWER_COUNTER_1_HI, -1, A5XX_CP_POWERCTR_CP_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_CP_POWER_COUNTER_2_LO,
+		A5XX_CP_POWER_COUNTER_2_HI, -1, A5XX_CP_POWERCTR_CP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_CP_POWER_COUNTER_3_LO,
+		A5XX_CP_POWER_COUNTER_3_HI, -1, A5XX_CP_POWERCTR_CP_SEL_3 },
+};
+
+static struct adreno_perfcount_register a5xx_pwrcounters_gpmu[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_POWER_COUNTER_0_LO,
+		A5XX_GPMU_POWER_COUNTER_0_HI, -1,
+		A5XX_GPMU_POWER_COUNTER_SELECT_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_POWER_COUNTER_1_LO,
+		A5XX_GPMU_POWER_COUNTER_1_HI, -1,
+		A5XX_GPMU_POWER_COUNTER_SELECT_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_POWER_COUNTER_2_LO,
+		A5XX_GPMU_POWER_COUNTER_2_HI, -1,
+		A5XX_GPMU_POWER_COUNTER_SELECT_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_POWER_COUNTER_3_LO,
+		A5XX_GPMU_POWER_COUNTER_3_HI, -1,
+		A5XX_GPMU_POWER_COUNTER_SELECT_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_POWER_COUNTER_4_LO,
+		A5XX_GPMU_POWER_COUNTER_4_HI, -1,
+		A5XX_GPMU_POWER_COUNTER_SELECT_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_POWER_COUNTER_5_LO,
+		A5XX_GPMU_POWER_COUNTER_5_HI, -1,
+		A5XX_GPMU_POWER_COUNTER_SELECT_1 },
+};
+
+static struct adreno_perfcount_register a5xx_pwrcounters_alwayson[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_ALWAYS_ON_COUNTER_LO,
+		A5XX_GPMU_ALWAYS_ON_COUNTER_HI, -1 },
+};
+
+#define A5XX_PERFCOUNTER_GROUP(offset, name, enable, read, load) \
+	ADRENO_PERFCOUNTER_GROUP(a5xx, offset, name, enable, read, load)
+
+#define A5XX_PERFCOUNTER_GROUP_FLAGS(offset, name, flags, enable, read, load) \
+	ADRENO_PERFCOUNTER_GROUP_FLAGS(a5xx, offset, name, flags, enable, \
+			read, load)
+
+#define A5XX_POWER_COUNTER_GROUP(offset, name, enable, read) \
+	[KGSL_PERFCOUNTER_GROUP_##offset##_PWR] = { a5xx_pwrcounters_##name, \
+	ARRAY_SIZE(a5xx_pwrcounters_##name), __stringify(name##_pwr), 0, \
+	enable, read, NULL }
+
+#define A5XX_REGULAR_PERFCOUNTER_GROUP(offset, name) \
+	A5XX_PERFCOUNTER_GROUP(offset, name, a5xx_counter_inline_enable, \
+			a5xx_counter_read, a5xx_counter_load)
+
+static struct adreno_perfcount_group a5xx_perfcounter_groups
+				[KGSL_PERFCOUNTER_GROUP_MAX] = {
+	A5XX_REGULAR_PERFCOUNTER_GROUP(CP, cp),
+	A5XX_PERFCOUNTER_GROUP(RBBM, rbbm,
+		a5xx_counter_rbbm_enable, a5xx_counter_read, a5xx_counter_load),
+	A5XX_REGULAR_PERFCOUNTER_GROUP(PC, pc),
+	A5XX_REGULAR_PERFCOUNTER_GROUP(VFD, vfd),
+	A5XX_REGULAR_PERFCOUNTER_GROUP(HLSQ, hlsq),
+	A5XX_REGULAR_PERFCOUNTER_GROUP(VPC, vpc),
+	A5XX_REGULAR_PERFCOUNTER_GROUP(CCU, ccu),
+	A5XX_REGULAR_PERFCOUNTER_GROUP(CMP, cmp),
+	A5XX_REGULAR_PERFCOUNTER_GROUP(TSE, tse),
+	A5XX_REGULAR_PERFCOUNTER_GROUP(RAS, ras),
+	A5XX_REGULAR_PERFCOUNTER_GROUP(LRZ, lrz),
+	A5XX_REGULAR_PERFCOUNTER_GROUP(UCHE, uche),
+	A5XX_REGULAR_PERFCOUNTER_GROUP(TP, tp),
+	A5XX_REGULAR_PERFCOUNTER_GROUP(SP, sp),
+	A5XX_REGULAR_PERFCOUNTER_GROUP(RB, rb),
+	A5XX_REGULAR_PERFCOUNTER_GROUP(VSC, vsc),
+	A5XX_PERFCOUNTER_GROUP(VBIF, vbif,
+		a5xx_counter_vbif_enable, a5xx_counter_read_norestore, NULL),
+	A5XX_PERFCOUNTER_GROUP_FLAGS(VBIF_PWR, vbif_pwr,
+		ADRENO_PERFCOUNTER_GROUP_FIXED,
+		a5xx_counter_vbif_pwr_enable,
+		a5xx_counter_read_norestore, NULL),
+	A5XX_PERFCOUNTER_GROUP_FLAGS(ALWAYSON, alwayson,
+		ADRENO_PERFCOUNTER_GROUP_FIXED,
+		a5xx_counter_alwayson_enable, a5xx_counter_alwayson_read, NULL),
+	A5XX_POWER_COUNTER_GROUP(SP, sp,
+		a5xx_counter_pwr_enable, a5xx_counter_read_norestore),
+	A5XX_POWER_COUNTER_GROUP(TP, tp,
+		a5xx_counter_pwr_enable, a5xx_counter_read_norestore),
+	A5XX_POWER_COUNTER_GROUP(RB, rb,
+		a5xx_counter_pwr_enable, a5xx_counter_read_norestore),
+	A5XX_POWER_COUNTER_GROUP(CCU, ccu,
+		a5xx_counter_pwr_enable, a5xx_counter_read_norestore),
+	A5XX_POWER_COUNTER_GROUP(UCHE, uche,
+		a5xx_counter_pwr_enable, a5xx_counter_read_norestore),
+	A5XX_POWER_COUNTER_GROUP(CP, cp,
+		a5xx_counter_pwr_enable, a5xx_counter_read_norestore),
+	A5XX_POWER_COUNTER_GROUP(GPMU, gpmu,
+		a5xx_counter_pwr_gpmu_enable, a5xx_counter_read_norestore),
+	A5XX_POWER_COUNTER_GROUP(ALWAYSON, alwayson,
+		a5xx_counter_pwr_alwayson_enable, a5xx_counter_read_norestore),
+};
+
+const struct adreno_perfcounters adreno_a5xx_perfcounters = {
+	a5xx_perfcounter_groups,
+	ARRAY_SIZE(a5xx_perfcounter_groups),
+};
--- a/qcom/opensource/graphics-kernel/adreno_a5xx_preempt.c
+++ b/qcom/opensource/graphics-kernel/adreno_a5xx_preempt.c
@ -0,0 +1,548 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2014-2017,2021 The Linux Foundation. All rights reserved.
+ * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include "adreno.h"
+#include "adreno_a5xx.h"
+#include "adreno_pm4types.h"
+#include "adreno_trace.h"
+
+#define PREEMPT_RECORD(_field) \
+		offsetof(struct a5xx_cp_preemption_record, _field)
+
+#define PREEMPT_SMMU_RECORD(_field) \
+		offsetof(struct a5xx_cp_smmu_info, _field)
+
+static void _update_wptr(struct adreno_device *adreno_dev, bool reset_timer)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_ringbuffer *rb = adreno_dev->cur_rb;
+	unsigned int wptr;
+	unsigned long flags;
+
+	spin_lock_irqsave(&rb->preempt_lock, flags);
+
+	kgsl_regread(device, A5XX_CP_RB_WPTR, &wptr);
+
+	if (wptr != rb->wptr) {
+		kgsl_regwrite(device, A5XX_CP_RB_WPTR, rb->wptr);
+		/*
+		 * In case something got submitted while preemption was on
+		 * going, reset the timer.
+		 */
+		reset_timer = true;
+	}
+
+	if (reset_timer)
+		rb->dispatch_q.expires = jiffies +
+			msecs_to_jiffies(adreno_drawobj_timeout);
+
+	spin_unlock_irqrestore(&rb->preempt_lock, flags);
+}
+
+static void _a5xx_preemption_done(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int status;
+
+	/*
+	 * In the very unlikely case that the power is off, do nothing - the
+	 * state will be reset on power up and everybody will be happy
+	 */
+
+	if (!kgsl_state_is_awake(device))
+		return;
+
+	kgsl_regread(device, A5XX_CP_CONTEXT_SWITCH_CNTL, &status);
+
+	if (status != 0) {
+		dev_err(device->dev,
+			     "Preemption not complete: status=%X cur=%d R/W=%X/%X next=%d R/W=%X/%X\n",
+			     status, adreno_dev->cur_rb->id,
+			     adreno_get_rptr(adreno_dev->cur_rb),
+			     adreno_dev->cur_rb->wptr,
+			     adreno_dev->next_rb->id,
+			     adreno_get_rptr(adreno_dev->next_rb),
+			     adreno_dev->next_rb->wptr);
+
+		/* Set a fault and restart */
+		adreno_dispatcher_fault(adreno_dev, ADRENO_PREEMPT_FAULT);
+
+		return;
+	}
+
+	del_timer_sync(&adreno_dev->preempt.timer);
+
+	trace_adreno_preempt_done(adreno_dev->cur_rb->id, adreno_dev->next_rb->id, 0, 0);
+
+	/* Clean up all the bits */
+	adreno_dev->prev_rb = adreno_dev->cur_rb;
+	adreno_dev->cur_rb = adreno_dev->next_rb;
+	adreno_dev->next_rb = NULL;
+
+	/* Update the wptr for the new command queue */
+	_update_wptr(adreno_dev, true);
+
+	/* Update the dispatcher timer for the new command queue */
+	mod_timer(&adreno_dev->dispatcher.timer,
+		adreno_dev->cur_rb->dispatch_q.expires);
+
+	/* Clear the preempt state */
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE);
+}
+
+static void _a5xx_preemption_fault(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int status;
+
+	/*
+	 * If the power is on check the preemption status one more time - if it
+	 * was successful then just transition to the complete state
+	 */
+	if (kgsl_state_is_awake(device)) {
+		kgsl_regread(device, A5XX_CP_CONTEXT_SWITCH_CNTL, &status);
+
+		if (status == 0) {
+			adreno_set_preempt_state(adreno_dev,
+				ADRENO_PREEMPT_COMPLETE);
+
+			adreno_dispatcher_schedule(device);
+			return;
+		}
+	}
+
+	dev_err(device->dev,
+		     "Preemption timed out: cur=%d R/W=%X/%X, next=%d R/W=%X/%X\n",
+		     adreno_dev->cur_rb->id,
+		     adreno_get_rptr(adreno_dev->cur_rb),
+		     adreno_dev->cur_rb->wptr,
+		     adreno_dev->next_rb->id,
+		     adreno_get_rptr(adreno_dev->next_rb),
+		     adreno_dev->next_rb->wptr);
+
+	adreno_dispatcher_fault(adreno_dev, ADRENO_PREEMPT_FAULT);
+}
+
+static void _a5xx_preemption_worker(struct work_struct *work)
+{
+	struct adreno_preemption *preempt = container_of(work,
+		struct adreno_preemption, work);
+	struct adreno_device *adreno_dev = container_of(preempt,
+		struct adreno_device, preempt);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	/* Need to take the mutex to make sure that the power stays on */
+	mutex_lock(&device->mutex);
+
+	if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_FAULTED))
+		_a5xx_preemption_fault(adreno_dev);
+
+	mutex_unlock(&device->mutex);
+}
+
+/* Find the highest priority active ringbuffer */
+static struct adreno_ringbuffer *a5xx_next_ringbuffer(
+		struct adreno_device *adreno_dev)
+{
+	struct adreno_ringbuffer *rb;
+	unsigned long flags;
+	unsigned int i;
+
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		bool empty;
+
+		spin_lock_irqsave(&rb->preempt_lock, flags);
+		empty = adreno_rb_empty(rb);
+		spin_unlock_irqrestore(&rb->preempt_lock, flags);
+
+		if (!empty)
+			return rb;
+	}
+
+	return NULL;
+}
+
+void a5xx_preemption_trigger(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_iommu *iommu = KGSL_IOMMU(device);
+	struct adreno_ringbuffer *next;
+	uint64_t ttbr0;
+	unsigned int contextidr;
+	unsigned long flags;
+
+	/* Put ourselves into a possible trigger state */
+	if (!adreno_move_preempt_state(adreno_dev,
+		ADRENO_PREEMPT_NONE, ADRENO_PREEMPT_START))
+		return;
+
+	/* Get the next ringbuffer to preempt in */
+	next = a5xx_next_ringbuffer(adreno_dev);
+
+	/*
+	 * Nothing to do if every ringbuffer is empty or if the current
+	 * ringbuffer is the only active one
+	 */
+	if (next == NULL || next == adreno_dev->cur_rb) {
+		/*
+		 * Update any critical things that might have been skipped while
+		 * we were looking for a new ringbuffer
+		 */
+
+		if (next != NULL) {
+			_update_wptr(adreno_dev, false);
+
+			mod_timer(&adreno_dev->dispatcher.timer,
+				adreno_dev->cur_rb->dispatch_q.expires);
+		}
+
+		adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE);
+		return;
+	}
+
+	/* Turn off the dispatcher timer */
+	del_timer(&adreno_dev->dispatcher.timer);
+
+	/*
+	 * This is the most critical section - we need to take care not to race
+	 * until we have programmed the CP for the switch
+	 */
+
+	spin_lock_irqsave(&next->preempt_lock, flags);
+
+	/* Get the pagetable from the pagetable info. */
+	kgsl_sharedmem_readq(device->scratch, &ttbr0,
+		SCRATCH_RB_OFFSET(next->id, ttbr0));
+	kgsl_sharedmem_readl(device->scratch, &contextidr,
+		SCRATCH_RB_OFFSET(next->id, contextidr));
+
+	kgsl_sharedmem_writel(next->preemption_desc,
+		PREEMPT_RECORD(wptr), next->wptr);
+
+	spin_unlock_irqrestore(&next->preempt_lock, flags);
+
+	/* And write it to the smmu info */
+	if (kgsl_mmu_is_perprocess(&device->mmu)) {
+		kgsl_sharedmem_writeq(iommu->smmu_info,
+			PREEMPT_SMMU_RECORD(ttbr0), ttbr0);
+		kgsl_sharedmem_writel(iommu->smmu_info,
+			PREEMPT_SMMU_RECORD(context_idr), contextidr);
+	}
+
+	kgsl_regwrite(device, A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_LO,
+		lower_32_bits(next->preemption_desc->gpuaddr));
+	kgsl_regwrite(device, A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_HI,
+		upper_32_bits(next->preemption_desc->gpuaddr));
+
+	adreno_dev->next_rb = next;
+
+	/* Start the timer to detect a stuck preemption */
+	mod_timer(&adreno_dev->preempt.timer,
+		jiffies + msecs_to_jiffies(ADRENO_PREEMPT_TIMEOUT));
+
+	trace_adreno_preempt_trigger(adreno_dev->cur_rb->id, adreno_dev->next_rb->id,
+		1, 0);
+
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_TRIGGERED);
+
+	/* Trigger the preemption */
+	kgsl_regwrite(device, A5XX_CP_CONTEXT_SWITCH_CNTL, 1);
+}
+
+void a5xx_preempt_callback(struct adreno_device *adreno_dev, int bit)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int status;
+
+	if (!adreno_move_preempt_state(adreno_dev,
+		ADRENO_PREEMPT_TRIGGERED, ADRENO_PREEMPT_PENDING))
+		return;
+
+	kgsl_regread(device, A5XX_CP_CONTEXT_SWITCH_CNTL, &status);
+
+	if (status != 0) {
+		dev_err(KGSL_DEVICE(adreno_dev)->dev,
+			     "preempt interrupt with non-zero status: %X\n",
+			     status);
+
+		/*
+		 * Under the assumption that this is a race between the
+		 * interrupt and the register, schedule the worker to clean up.
+		 * If the status still hasn't resolved itself by the time we get
+		 * there then we have to assume something bad happened
+		 */
+		adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_COMPLETE);
+		adreno_dispatcher_schedule(device);
+		return;
+	}
+
+	del_timer(&adreno_dev->preempt.timer);
+
+	trace_adreno_preempt_done(adreno_dev->cur_rb->id, adreno_dev->next_rb->id, 0, 0);
+
+	adreno_dev->prev_rb = adreno_dev->cur_rb;
+	adreno_dev->cur_rb = adreno_dev->next_rb;
+	adreno_dev->next_rb = NULL;
+
+	/* Update the wptr if it changed while preemption was ongoing */
+	_update_wptr(adreno_dev, true);
+
+	/* Update the dispatcher timer for the new command queue */
+	mod_timer(&adreno_dev->dispatcher.timer,
+		adreno_dev->cur_rb->dispatch_q.expires);
+
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE);
+
+	a5xx_preemption_trigger(adreno_dev);
+}
+
+void a5xx_preemption_schedule(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return;
+
+	mutex_lock(&device->mutex);
+
+	if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_COMPLETE))
+		_a5xx_preemption_done(adreno_dev);
+
+	a5xx_preemption_trigger(adreno_dev);
+
+	mutex_unlock(&device->mutex);
+}
+
+u32 a5xx_preemption_pre_ibsubmit(struct adreno_device *adreno_dev,
+			struct adreno_ringbuffer *rb,
+			struct adreno_context *drawctxt, u32 *cmds)
+{
+	unsigned int *cmds_orig = cmds;
+	uint64_t gpuaddr = rb->preemption_desc->gpuaddr;
+	unsigned int preempt_style = 0;
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return 0;
+
+	if (drawctxt) {
+		/*
+		 * Preemption from secure to unsecure needs Zap shader to be
+		 * run to clear all secure content. CP does not know during
+		 * preemption if it is switching between secure and unsecure
+		 * contexts so restrict Secure contexts to be preempted at
+		 * ringbuffer level.
+		 */
+		if (drawctxt->base.flags & KGSL_CONTEXT_SECURE)
+			preempt_style = KGSL_CONTEXT_PREEMPT_STYLE_RINGBUFFER;
+		else
+			preempt_style = FIELD_GET(KGSL_CONTEXT_PREEMPT_STYLE_MASK,
+				drawctxt->base.flags);
+	}
+
+	/*
+	 * CP_PREEMPT_ENABLE_GLOBAL(global preemption) can only be set by KMD
+	 * in ringbuffer.
+	 * 1) set global preemption to 0x0 to disable global preemption.
+	 *    Only RB level preemption is allowed in this mode
+	 * 2) Set global preemption to defer(0x2) for finegrain preemption.
+	 *    when global preemption is set to defer(0x2),
+	 *    CP_PREEMPT_ENABLE_LOCAL(local preemption) determines the
+	 *    preemption point. Local preemption
+	 *    can be enabled by both UMD(within IB) and KMD.
+	 */
+	*cmds++ = cp_type7_packet(CP_PREEMPT_ENABLE_GLOBAL, 1);
+	*cmds++ = ((preempt_style == KGSL_CONTEXT_PREEMPT_STYLE_FINEGRAIN)
+				? 2 : 0);
+
+	/* Turn CP protection OFF */
+	cmds += cp_protected_mode(adreno_dev, cmds, 0);
+
+	/*
+	 * CP during context switch will save context switch info to
+	 * a5xx_cp_preemption_record pointed by CONTEXT_SWITCH_SAVE_ADDR
+	 */
+	*cmds++ = cp_type4_packet(A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 1);
+	*cmds++ = lower_32_bits(gpuaddr);
+	*cmds++ = cp_type4_packet(A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_HI, 1);
+	*cmds++ = upper_32_bits(gpuaddr);
+
+	/* Turn CP protection ON */
+	cmds += cp_protected_mode(adreno_dev, cmds, 1);
+
+	/*
+	 * Enable local preemption for finegrain preemption in case of
+	 * a misbehaving IB
+	 */
+	if (preempt_style == KGSL_CONTEXT_PREEMPT_STYLE_FINEGRAIN) {
+		*cmds++ = cp_type7_packet(CP_PREEMPT_ENABLE_LOCAL, 1);
+		*cmds++ = 1;
+	} else {
+		*cmds++ = cp_type7_packet(CP_PREEMPT_ENABLE_LOCAL, 1);
+		*cmds++ = 0;
+	}
+
+	/* Enable CP_CONTEXT_SWITCH_YIELD packets in the IB2s */
+	*cmds++ = cp_type7_packet(CP_YIELD_ENABLE, 1);
+	*cmds++ = 2;
+
+	return (unsigned int) (cmds - cmds_orig);
+}
+
+unsigned int a5xx_preemption_post_ibsubmit(struct adreno_device *adreno_dev,
+	unsigned int *cmds)
+{
+	int dwords = 0;
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return 0;
+
+	cmds[dwords++] = cp_type7_packet(CP_CONTEXT_SWITCH_YIELD, 4);
+	/* Write NULL to the address to skip the data write */
+	dwords += cp_gpuaddr(adreno_dev, &cmds[dwords], 0x0);
+	cmds[dwords++] = 1;
+	/* generate interrupt on preemption completion */
+	cmds[dwords++] = 1;
+
+	return dwords;
+}
+
+void a5xx_preemption_start(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_iommu *iommu = KGSL_IOMMU(device);
+	struct adreno_ringbuffer *rb;
+	unsigned int i;
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return;
+
+	/* Force the state to be clear */
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE);
+
+	/* Only set up smmu info when per-process pagetables are enabled */
+
+	if (kgsl_mmu_is_perprocess(&device->mmu)) {
+		/* smmu_info is allocated and mapped in a5xx_preemption_iommu_init */
+		kgsl_sharedmem_writel(iommu->smmu_info,
+			PREEMPT_SMMU_RECORD(magic), A5XX_CP_SMMU_INFO_MAGIC_REF);
+		kgsl_sharedmem_writeq(iommu->smmu_info,
+			PREEMPT_SMMU_RECORD(ttbr0), MMU_DEFAULT_TTBR0(device));
+
+		/* The CP doesn't use the asid record, so poison it */
+		kgsl_sharedmem_writel(iommu->smmu_info,
+			PREEMPT_SMMU_RECORD(asid), 0xDECAFBAD);
+		kgsl_sharedmem_writel(iommu->smmu_info,
+			PREEMPT_SMMU_RECORD(context_idr), 0);
+
+		kgsl_regwrite(device, A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO,
+			lower_32_bits(iommu->smmu_info->gpuaddr));
+
+		kgsl_regwrite(device, A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI,
+			upper_32_bits(iommu->smmu_info->gpuaddr));
+	}
+
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		/*
+		 * preemption_desc is allocated and mapped at init time,
+		 * so no need to check sharedmem_writel return value
+		 */
+		kgsl_sharedmem_writel(rb->preemption_desc,
+			PREEMPT_RECORD(rptr), 0);
+		kgsl_sharedmem_writel(rb->preemption_desc,
+			PREEMPT_RECORD(wptr), 0);
+
+		adreno_ringbuffer_set_pagetable(device, rb,
+			device->mmu.defaultpagetable);
+	}
+
+}
+
+static int a5xx_preemption_ringbuffer_init(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb, uint64_t counteraddr)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (IS_ERR_OR_NULL(rb->preemption_desc))
+		rb->preemption_desc = kgsl_allocate_global(device,
+			A5XX_CP_CTXRECORD_SIZE_IN_BYTES, SZ_16K, 0,
+			KGSL_MEMDESC_PRIVILEGED, "preemption_desc");
+
+	if (IS_ERR(rb->preemption_desc))
+		return PTR_ERR(rb->preemption_desc);
+
+	kgsl_sharedmem_writel(rb->preemption_desc,
+		PREEMPT_RECORD(magic), A5XX_CP_CTXRECORD_MAGIC_REF);
+	kgsl_sharedmem_writel(rb->preemption_desc,
+		PREEMPT_RECORD(info), 0);
+	kgsl_sharedmem_writel(rb->preemption_desc,
+		PREEMPT_RECORD(data), 0);
+	kgsl_sharedmem_writel(rb->preemption_desc,
+		PREEMPT_RECORD(cntl), A5XX_CP_RB_CNTL_DEFAULT);
+	kgsl_sharedmem_writel(rb->preemption_desc,
+		PREEMPT_RECORD(rptr), 0);
+	kgsl_sharedmem_writel(rb->preemption_desc,
+		PREEMPT_RECORD(wptr), 0);
+	kgsl_sharedmem_writeq(rb->preemption_desc,
+		PREEMPT_RECORD(rptr_addr), SCRATCH_RB_GPU_ADDR(device,
+			rb->id, rptr));
+	kgsl_sharedmem_writeq(rb->preemption_desc,
+		PREEMPT_RECORD(rbase), rb->buffer_desc->gpuaddr);
+	kgsl_sharedmem_writeq(rb->preemption_desc,
+		PREEMPT_RECORD(counter), counteraddr);
+
+	return 0;
+}
+
+int a5xx_preemption_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_iommu *iommu = KGSL_IOMMU(device);
+	struct adreno_preemption *preempt = &adreno_dev->preempt;
+	struct adreno_ringbuffer *rb;
+	int ret;
+	unsigned int i;
+	uint64_t addr;
+
+	/* We are dependent on IOMMU to make preemption go on the CP side */
+	if (kgsl_mmu_get_mmutype(device) != KGSL_MMU_TYPE_IOMMU)
+		return -ENODEV;
+
+	INIT_WORK(&preempt->work, _a5xx_preemption_worker);
+
+	/* Allocate mem for storing preemption counters */
+	if (IS_ERR_OR_NULL(preempt->scratch))
+		preempt->scratch = kgsl_allocate_global(device,
+			adreno_dev->num_ringbuffers *
+			A5XX_CP_CTXRECORD_PREEMPTION_COUNTER_SIZE, 0, 0, 0,
+			"preemption_counters");
+
+	ret = PTR_ERR_OR_ZERO(preempt->scratch);
+	if (ret)
+		return ret;
+
+	addr = preempt->scratch->gpuaddr;
+
+	/* Allocate mem for storing preemption switch record */
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		ret = a5xx_preemption_ringbuffer_init(adreno_dev, rb, addr);
+		if (ret)
+			return ret;
+
+		addr += A5XX_CP_CTXRECORD_PREEMPTION_COUNTER_SIZE;
+	}
+
+	/* Allocate mem for storing preemption smmu record */
+	if (kgsl_mmu_is_perprocess(&device->mmu) && IS_ERR_OR_NULL(iommu->smmu_info))
+		iommu->smmu_info = kgsl_allocate_global(device, PAGE_SIZE, 0,
+			KGSL_MEMFLAGS_GPUREADONLY, KGSL_MEMDESC_PRIVILEGED,
+			"smmu_info");
+
+	if (IS_ERR(iommu->smmu_info))
+		return PTR_ERR(iommu->smmu_info);
+
+	set_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv);
+	return 0;
+}
--- a/qcom/opensource/graphics-kernel/adreno_a5xx_ringbuffer.c
+++ b/qcom/opensource/graphics-kernel/adreno_a5xx_ringbuffer.c
@ -0,0 +1,530 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include "adreno.h"
+#include "adreno_a5xx.h"
+#include "adreno_pm4types.h"
+#include "adreno_ringbuffer.h"
+#include "adreno_trace.h"
+#include "kgsl_trace.h"
+
+static int a5xx_rb_pagetable_switch(struct kgsl_device *device,
+		struct adreno_context *drawctxt,
+		struct adreno_ringbuffer *rb,
+		struct kgsl_pagetable *pagetable, u32 *cmds)
+{
+	u64 ttbr0 = kgsl_mmu_pagetable_get_ttbr0(pagetable);
+	u32 id = drawctxt ? drawctxt->base.id : 0;
+
+	if (pagetable == device->mmu.defaultpagetable)
+		return 0;
+
+	cmds[0] = cp_type7_packet(CP_SMMU_TABLE_UPDATE, 3);
+	cmds[1] = lower_32_bits(ttbr0);
+	cmds[2] = upper_32_bits(ttbr0);
+	cmds[3] = id;
+
+	cmds[4] = cp_type7_packet(CP_WAIT_FOR_IDLE, 0);
+	cmds[5] = cp_type7_packet(CP_WAIT_FOR_ME, 0);
+	cmds[6] = cp_type4_packet(A5XX_CP_CNTL, 1);
+	cmds[7] = 1;
+
+	cmds[8] = cp_type7_packet(CP_MEM_WRITE, 5);
+	cmds[9] = lower_32_bits(SCRATCH_RB_GPU_ADDR(device,
+			rb->id, ttbr0));
+	cmds[10] = upper_32_bits(SCRATCH_RB_GPU_ADDR(device,
+			rb->id, ttbr0));
+	cmds[11] = lower_32_bits(ttbr0);
+	cmds[12] = upper_32_bits(ttbr0);
+	cmds[13] = id;
+
+	cmds[14] = cp_type7_packet(CP_WAIT_FOR_IDLE, 0);
+	cmds[15] = cp_type7_packet(CP_WAIT_FOR_ME, 0);
+	cmds[16] = cp_type4_packet(A5XX_CP_CNTL, 1);
+	cmds[17] = 0;
+
+	return 18;
+}
+
+#define RB_SOPTIMESTAMP(device, rb) \
+	       MEMSTORE_RB_GPU_ADDR(device, rb, soptimestamp)
+#define CTXT_SOPTIMESTAMP(device, drawctxt) \
+	       MEMSTORE_ID_GPU_ADDR(device, (drawctxt)->base.id, soptimestamp)
+
+#define RB_EOPTIMESTAMP(device, rb) \
+	       MEMSTORE_RB_GPU_ADDR(device, rb, eoptimestamp)
+#define CTXT_EOPTIMESTAMP(device, drawctxt) \
+	       MEMSTORE_ID_GPU_ADDR(device, (drawctxt)->base.id, eoptimestamp)
+
+int a5xx_ringbuffer_submit(struct adreno_ringbuffer *rb,
+		struct adreno_submit_time *time, bool sync)
+{
+	struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned long flags;
+
+	adreno_get_submit_time(adreno_dev, rb, time);
+	adreno_profile_submit_time(time);
+
+	if (sync) {
+		u32 *cmds = adreno_ringbuffer_allocspace(rb, 3);
+
+		if (IS_ERR(cmds))
+			return PTR_ERR(cmds);
+
+		cmds[0] = cp_type7_packet(CP_WHERE_AM_I, 2);
+		cmds[1] = lower_32_bits(SCRATCH_RB_GPU_ADDR(device, rb->id,
+				rptr));
+		cmds[2] = upper_32_bits(SCRATCH_RB_GPU_ADDR(device, rb->id,
+				rptr));
+	}
+
+	spin_lock_irqsave(&rb->preempt_lock, flags);
+	if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE)) {
+		if (adreno_dev->cur_rb == rb) {
+			kgsl_pwrscale_busy(device);
+			kgsl_regwrite(device, A5XX_CP_RB_WPTR, rb->_wptr);
+		}
+	}
+
+	rb->wptr = rb->_wptr;
+	spin_unlock_irqrestore(&rb->preempt_lock, flags);
+
+	return 0;
+}
+
+int a5xx_ringbuffer_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int i;
+
+	if (IS_ERR_OR_NULL(device->scratch))
+		device->scratch = kgsl_allocate_global(device, PAGE_SIZE,
+			0, 0, KGSL_MEMDESC_RANDOM | KGSL_MEMDESC_PRIVILEGED,
+			"scratch");
+
+	if (IS_ERR(device->scratch))
+		return PTR_ERR(device->scratch);
+
+	adreno_dev->cur_rb = &(adreno_dev->ringbuffers[0]);
+
+	if (!adreno_preemption_feature_set(adreno_dev)) {
+		adreno_dev->num_ringbuffers = 1;
+		return adreno_ringbuffer_setup(adreno_dev,
+			&adreno_dev->ringbuffers[0], 0);
+	}
+
+	adreno_dev->num_ringbuffers = ARRAY_SIZE(adreno_dev->ringbuffers);
+
+	for (i = 0; i < adreno_dev->num_ringbuffers; i++) {
+		int ret;
+
+		ret = adreno_ringbuffer_setup(adreno_dev,
+			&adreno_dev->ringbuffers[i], i);
+		if (ret)
+			return ret;
+	}
+
+	timer_setup(&adreno_dev->preempt.timer, adreno_preemption_timer, 0);
+	a5xx_preemption_init(adreno_dev);
+	return 0;
+}
+
+#define A5XX_SUBMIT_MAX 64
+
+int a5xx_ringbuffer_addcmds(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb, struct adreno_context *drawctxt,
+		u32 flags, u32 *in, u32 dwords, u32 timestamp,
+		struct adreno_submit_time *time)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	static u32 sequence;
+	u32 size = A5XX_SUBMIT_MAX + dwords;
+	u32 *cmds, index = 0;
+	u64 profile_gpuaddr;
+	u32 profile_dwords;
+
+	if (adreno_drawctxt_detached(drawctxt))
+		return -ENOENT;
+
+	if (adreno_gpu_fault(adreno_dev) != 0)
+		return -EPROTO;
+
+	rb->timestamp++;
+
+	if (drawctxt)
+		drawctxt->internal_timestamp = rb->timestamp;
+
+	cmds = adreno_ringbuffer_allocspace(rb, size);
+	if (IS_ERR(cmds))
+		return PTR_ERR(cmds);
+
+	/* Identify the start of a command */
+	cmds[index++] = cp_type7_packet(CP_NOP, 1);
+	cmds[index++] = drawctxt ? CMD_IDENTIFIER : CMD_INTERNAL_IDENTIFIER;
+
+	/* 14 dwords */
+	index += a5xx_preemption_pre_ibsubmit(adreno_dev, rb, drawctxt,
+		&cmds[index]);
+
+	profile_gpuaddr = adreno_profile_preib_processing(adreno_dev,
+		drawctxt, &profile_dwords);
+
+	if (profile_gpuaddr) {
+		cmds[index++] = cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3);
+		cmds[index++] = lower_32_bits(profile_gpuaddr);
+		cmds[index++] = upper_32_bits(profile_gpuaddr);
+		cmds[index++] = profile_dwords;
+	}
+
+	if (drawctxt) {
+		cmds[index++] = cp_type7_packet(CP_MEM_WRITE, 3);
+		cmds[index++] = lower_32_bits(CTXT_SOPTIMESTAMP(device,
+					drawctxt));
+		cmds[index++] = upper_32_bits(CTXT_SOPTIMESTAMP(device,
+					drawctxt));
+		cmds[index++] = timestamp;
+	}
+
+	cmds[index++] = cp_type7_packet(CP_MEM_WRITE, 3);
+	cmds[index++] = lower_32_bits(RB_SOPTIMESTAMP(device, rb));
+	cmds[index++] = upper_32_bits(RB_SOPTIMESTAMP(device, rb));
+	cmds[index++] = rb->timestamp;
+
+	if (IS_SECURE(flags)) {
+		cmds[index++] = cp_type7_packet(CP_SET_SECURE_MODE, 1);
+		cmds[index++] = 1;
+	}
+
+	if (IS_NOTPROTECTED(flags)) {
+		cmds[index++] = cp_type7_packet(CP_SET_PROTECTED_MODE, 1);
+		cmds[index++] = 0;
+	}
+
+	memcpy(&cmds[index], in, dwords << 2);
+	index += dwords;
+
+	if (IS_NOTPROTECTED(flags)) {
+		cmds[index++] = cp_type7_packet(CP_SET_PROTECTED_MODE, 1);
+		cmds[index++] = 1;
+	}
+
+	/* 4 dwords */
+	profile_gpuaddr = adreno_profile_postib_processing(adreno_dev,
+		drawctxt, &profile_dwords);
+
+	if (profile_gpuaddr) {
+		cmds[index++] = cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3);
+		cmds[index++] = lower_32_bits(profile_gpuaddr);
+		cmds[index++] = upper_32_bits(profile_gpuaddr);
+		cmds[index++] = profile_dwords;
+	}
+
+	if (!adreno_is_a510(adreno_dev) &&
+		test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE,
+			&device->mmu.pfpolicy))
+		cmds[index++] = cp_type7_packet(CP_WAIT_MEM_WRITES, 0);
+
+	/*
+	 * Do a unique memory write from the GPU to assist in early detection of
+	 * interrupt storms
+	 */
+
+	cmds[index++] = cp_type7_packet(CP_MEM_WRITE, 3);
+	cmds[index++] = lower_32_bits(MEMSTORE_ID_GPU_ADDR(device,
+				KGSL_MEMSTORE_GLOBAL, ref_wait_ts));
+	cmds[index++] = upper_32_bits(MEMSTORE_ID_GPU_ADDR(device,
+				KGSL_MEMSTORE_GLOBAL, ref_wait_ts));
+	cmds[index++] = ++sequence;
+
+	/*
+	 * If this is an internal command, just write the ringbuffer timestamp,
+	 * otherwise, write both
+	 */
+	if (!drawctxt) {
+		cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4);
+		cmds[index++] = CACHE_FLUSH_TS | (1 << 31);
+		cmds[index++] = lower_32_bits(RB_EOPTIMESTAMP(device, rb));
+		cmds[index++] = upper_32_bits(RB_EOPTIMESTAMP(device, rb));
+		cmds[index++] = rb->timestamp;
+	} else {
+		cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4);
+		cmds[index++] = CACHE_FLUSH_TS | (1 << 31);
+		cmds[index++] = lower_32_bits(CTXT_EOPTIMESTAMP(device,
+					drawctxt));
+		cmds[index++] = upper_32_bits(CTXT_EOPTIMESTAMP(device,
+					drawctxt));
+		cmds[index++] = timestamp;
+
+		cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4);
+		cmds[index++] = CACHE_FLUSH_TS;
+		cmds[index++] = lower_32_bits(RB_EOPTIMESTAMP(device, rb));
+		cmds[index++] = upper_32_bits(RB_EOPTIMESTAMP(device, rb));
+		cmds[index++] = rb->timestamp;
+	}
+
+	if (IS_WFI(flags))
+		cmds[index++] = cp_type7_packet(CP_WAIT_FOR_IDLE, 0);
+
+	if (IS_SECURE(flags)) {
+		cmds[index++] = cp_type7_packet(CP_SET_SECURE_MODE, 1);
+		cmds[index++] = 0;
+	}
+
+	/* 5 dwords */
+	index += a5xx_preemption_post_ibsubmit(adreno_dev, &cmds[index]);
+
+	/* Adjust the thing for the number of bytes we actually wrote */
+	rb->_wptr -= (size - index);
+
+	a5xx_ringbuffer_submit(rb, time,
+		!adreno_is_preemption_enabled(adreno_dev));
+
+	return 0;
+}
+
+static u32 a5xx_get_alwayson_counter(struct adreno_device *adreno_dev,
+		u32 *cmds, u64 gpuaddr)
+{
+	cmds[0] = cp_type7_packet(CP_REG_TO_MEM, 3);
+	cmds[1] = A5XX_RBBM_ALWAYSON_COUNTER_LO;
+
+	/* On some targets the upper 32 bits are not reliable */
+	if (ADRENO_GPUREV(adreno_dev) > ADRENO_REV_A530)
+		cmds[1] |= (1 << 30) | (2 << 18);
+
+	cmds[2] = lower_32_bits(gpuaddr);
+	cmds[3] = upper_32_bits(gpuaddr);
+
+	return 4;
+}
+
+/* This is the maximum possible size for 64 bit targets */
+#define PROFILE_IB_DWORDS 4
+#define PROFILE_IB_SLOTS (PAGE_SIZE / (PROFILE_IB_DWORDS << 2))
+
+static u64 a5xx_get_user_profiling_ib(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb, struct kgsl_drawobj_cmd *cmdobj,
+		u32 target_offset, u32 *cmds)
+{
+	u32 offset, *ib, dwords;
+	u64 gpuaddr;
+
+	if (IS_ERR(rb->profile_desc))
+		return 0;
+
+	offset = rb->profile_index * (PROFILE_IB_DWORDS << 2);
+	ib = rb->profile_desc->hostptr + offset;
+	gpuaddr = rb->profile_desc->gpuaddr + offset;
+	dwords = a5xx_get_alwayson_counter(adreno_dev, ib,
+		cmdobj->profiling_buffer_gpuaddr + target_offset);
+
+	cmds[0] = cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3);
+	cmds[1] = lower_32_bits(gpuaddr);
+	cmds[2] = upper_32_bits(gpuaddr);
+	cmds[3] = dwords;
+
+	rb->profile_index = (rb->profile_index + 1) % PROFILE_IB_SLOTS;
+
+	return 4;
+}
+
+static int a5xx_rb_context_switch(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb,
+		struct adreno_context *drawctxt)
+{
+	struct kgsl_pagetable *pagetable =
+		adreno_drawctxt_get_pagetable(drawctxt);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int count = 0;
+	u32 cmds[32];
+
+	if (adreno_drawctxt_get_pagetable(rb->drawctxt_active) != pagetable)
+		count += a5xx_rb_pagetable_switch(device, drawctxt,
+				rb, pagetable, cmds);
+
+	cmds[count++] = cp_type7_packet(CP_NOP, 1);
+	cmds[count++] = CONTEXT_TO_MEM_IDENTIFIER;
+
+	cmds[count++] = cp_type7_packet(CP_MEM_WRITE, 3);
+	cmds[count++] = lower_32_bits(MEMSTORE_RB_GPU_ADDR(device, rb,
+				current_context));
+	cmds[count++] = upper_32_bits(MEMSTORE_RB_GPU_ADDR(device, rb,
+				current_context));
+	cmds[count++] = drawctxt->base.id;
+
+	cmds[count++] = cp_type7_packet(CP_MEM_WRITE, 3);
+	cmds[count++] = lower_32_bits(MEMSTORE_ID_GPU_ADDR(device,
+		KGSL_MEMSTORE_GLOBAL, current_context));
+	cmds[count++] = upper_32_bits(MEMSTORE_ID_GPU_ADDR(device,
+		KGSL_MEMSTORE_GLOBAL, current_context));
+	cmds[count++] = drawctxt->base.id;
+
+	cmds[count++] = cp_type4_packet(A5XX_UCHE_INVALIDATE0, 1);
+	cmds[count++] = 0x12;
+
+	return a5xx_ringbuffer_addcmds(adreno_dev, rb, NULL, F_NOTPROTECTED,
+			cmds, count, 0, NULL);
+}
+
+static int a5xx_drawctxt_switch(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb,
+		struct adreno_context *drawctxt)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (rb->drawctxt_active == drawctxt)
+		return 0;
+
+	if (kgsl_context_detached(&drawctxt->base))
+		return -ENOENT;
+
+	if (!_kgsl_context_get(&drawctxt->base))
+		return -ENOENT;
+
+	trace_adreno_drawctxt_switch(rb, drawctxt);
+
+	a5xx_rb_context_switch(adreno_dev, rb, drawctxt);
+
+	/* Release the current drawctxt as soon as the new one is switched */
+	adreno_put_drawctxt_on_timestamp(device, rb->drawctxt_active,
+		rb, rb->timestamp);
+
+	rb->drawctxt_active = drawctxt;
+	return 0;
+}
+
+
+#define A5XX_USER_PROFILE_IB(dev, rb, cmdobj, cmds, field) \
+	a5xx_get_user_profiling_ib((dev), (rb), (cmdobj), \
+		offsetof(struct kgsl_drawobj_profiling_buffer, field), \
+		(cmds))
+
+#define A5XX_KERNEL_PROFILE(dev, cmdobj, cmds, field) \
+	a5xx_get_alwayson_counter((dev), (cmds), \
+		(dev)->profile_buffer->gpuaddr + \
+			ADRENO_DRAWOBJ_PROFILE_OFFSET((cmdobj)->profile_index, \
+				field))
+
+#define A5XX_COMMAND_DWORDS 32
+
+int a5xx_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
+		struct kgsl_drawobj_cmd *cmdobj, u32 flags,
+		struct adreno_submit_time *time)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context);
+	struct adreno_ringbuffer *rb = drawctxt->rb;
+	int ret = 0, numibs = 0, index = 0;
+	u32 *cmds;
+
+	/* Count the number of IBs (if we are not skipping) */
+	if (!IS_SKIP(flags)) {
+		struct list_head *tmp;
+
+		list_for_each(tmp, &cmdobj->cmdlist)
+			numibs++;
+	}
+
+	cmds = kmalloc((A5XX_COMMAND_DWORDS + (numibs * 5)) << 2, GFP_KERNEL);
+	if (!cmds) {
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	cmds[index++] = cp_type7_packet(CP_NOP, 1);
+	cmds[index++] = START_IB_IDENTIFIER;
+
+	/* Kernel profiling: 4 dwords */
+	if (IS_KERNEL_PROFILE(flags))
+		index += A5XX_KERNEL_PROFILE(adreno_dev, cmdobj, &cmds[index],
+			started);
+
+	/* User profiling: 4 dwords */
+	if (IS_USER_PROFILE(flags))
+		index += A5XX_USER_PROFILE_IB(adreno_dev, rb, cmdobj,
+			&cmds[index], gpu_ticks_submitted);
+
+	if (numibs) {
+		struct kgsl_memobj_node *ib;
+
+		list_for_each_entry(ib, &cmdobj->cmdlist, node) {
+			if (ib->priv & MEMOBJ_SKIP ||
+			    (ib->flags & KGSL_CMDLIST_CTXTSWITCH_PREAMBLE
+			     && !IS_PREAMBLE(flags)))
+				cmds[index++] = cp_type7_packet(CP_NOP, 4);
+
+			cmds[index++] =
+				cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3);
+			cmds[index++] = lower_32_bits(ib->gpuaddr);
+			cmds[index++] = upper_32_bits(ib->gpuaddr);
+
+			/* Double check that IB_PRIV is never set */
+			cmds[index++] = (ib->size >> 2) & 0xfffff;
+		}
+	}
+
+	/*
+	 * SRM -- set render mode (ex binning, direct render etc)
+	 * SRM is set by UMD usually at start of IB to tell CP the type of
+	 * preemption.
+	 * KMD needs to set SRM to NULL to indicate CP that rendering is
+	 * done by IB.
+	 */
+	cmds[index++] = cp_type7_packet(CP_SET_RENDER_MODE, 5);
+	cmds[index++] = 0;
+	cmds[index++] = 0;
+	cmds[index++] = 0;
+	cmds[index++] = 0;
+	cmds[index++] = 0;
+
+	cmds[index++] = cp_type7_packet(CP_YIELD_ENABLE, 1);
+	cmds[index++] = 1;
+
+	/* 4 dwords */
+	if (IS_KERNEL_PROFILE(flags))
+		index += A5XX_KERNEL_PROFILE(adreno_dev, cmdobj, &cmds[index],
+			retired);
+
+	/* 4 dwords */
+	if (IS_USER_PROFILE(flags))
+		index += A5XX_USER_PROFILE_IB(adreno_dev, rb, cmdobj,
+			&cmds[index], gpu_ticks_retired);
+
+	cmds[index++] = cp_type7_packet(CP_NOP, 1);
+	cmds[index++] = END_IB_IDENTIFIER;
+
+	ret = a5xx_drawctxt_switch(adreno_dev, rb, drawctxt);
+
+	/*
+	 * In the unlikely event of an error in the drawctxt switch,
+	 * treat it like a hang
+	 */
+	if (ret) {
+		/*
+		 * It is "normal" to get a -ENOSPC or a -ENOENT. Don't log it,
+		 * the upper layers know how to handle it
+		 */
+		if (ret != -ENOSPC && ret != -ENOENT)
+			dev_err(device->dev,
+				     "Unable to switch draw context: %d\n",
+				     ret);
+		goto done;
+	}
+
+	adreno_drawobj_set_constraint(device, drawobj);
+
+	ret = a5xx_ringbuffer_addcmds(adreno_dev, drawctxt->rb, drawctxt,
+		flags, cmds, index, drawobj->timestamp, time);
+
+done:
+	trace_kgsl_issueibcmds(device, drawctxt->base.id, numibs,
+		drawobj->timestamp, drawobj->flags, ret, drawctxt->type);
+
+	kfree(cmds);
+	return ret;
+}
--- a/qcom/opensource/graphics-kernel/adreno_a5xx_snapshot.c
+++ b/qcom/opensource/graphics-kernel/adreno_a5xx_snapshot.c
--- a/qcom/opensource/graphics-kernel/adreno_a6xx.c
+++ b/qcom/opensource/graphics-kernel/adreno_a6xx.c
--- a/qcom/opensource/graphics-kernel/adreno_a6xx.h
+++ b/qcom/opensource/graphics-kernel/adreno_a6xx.h
@ -0,0 +1,450 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2017-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _ADRENO_A6XX_H_
+#define _ADRENO_A6XX_H_
+
+#include <linux/delay.h>
+
+#include "a6xx_reg.h"
+#include "adreno_a6xx_gmu.h"
+#include "adreno_a6xx_rgmu.h"
+
+extern const struct adreno_power_ops a6xx_gmu_power_ops;
+extern const struct adreno_power_ops a6xx_rgmu_power_ops;
+extern const struct adreno_power_ops a630_gmu_power_ops;
+extern const struct adreno_power_ops a6xx_hwsched_power_ops;
+
+struct a6xx_gpudev {
+	struct adreno_gpudev base;
+	int (*hfi_probe)(struct adreno_device *adreno_dev);
+	void (*hfi_remove)(struct adreno_device *adreno_dev);
+	void (*handle_watchdog)(struct adreno_device *adreno_dev);
+};
+
+extern const struct a6xx_gpudev adreno_a630_gpudev;
+extern const struct a6xx_gpudev adreno_a6xx_gmu_gpudev;
+extern const struct a6xx_gpudev adreno_a6xx_hwsched_gpudev;
+
+/**
+ * struct a6xx_device - Container for the a6xx_device
+ */
+struct a6xx_device {
+	/** @gmu: Container for the a6xx GMU device */
+	struct a6xx_gmu_device gmu;
+	/** @rgmu: Container for the a6xx rGMU device */
+	struct a6xx_rgmu_device rgmu;
+	/** @adreno_dev: Container for the generic adreno device */
+	struct adreno_device adreno_dev;
+};
+
+/**
+ * struct adreno_a6xx_core - a6xx specific GPU core definitions
+ */
+struct adreno_a6xx_core {
+	/** @base: Container for the generic GPU definitions */
+	struct adreno_gpu_core base;
+	/** @gmu_major: The maximum GMU version supported by the core */
+	u32 gmu_major;
+	/** @gmu_minor: The minimum GMU version supported by the core */
+	u32 gmu_minor;
+	/** @prim_fifo_threshold: target specific value for PC_DBG_ECO_CNTL */
+	unsigned int prim_fifo_threshold;
+	/** @sqefw_name: Name of the SQE microcode file */
+	const char *sqefw_name;
+	/** @gmufw_name: Name of the GMU firmware file */
+	const char *gmufw_name;
+	/** @zap_name: Name of the CPZ zap file */
+	const char *zap_name;
+	/** @hwcg: List of registers and values to write for HWCG */
+	const struct kgsl_regmap_list *hwcg;
+	/** @hwcg_count: Number of registers in @hwcg */
+	u32 hwcg_count;
+	/** @vbif: List of registers and values to write for VBIF */
+	const struct kgsl_regmap_list *vbif;
+	/** @vbif_count: Number of registers in @vbif */
+	u32 vbif_count;
+	/** @veto_fal10: veto status for fal10 feature */
+	bool veto_fal10;
+	/** @pdc_in_aop: True if PDC programmed in AOP */
+	bool pdc_in_aop;
+	/** @hang_detect_cycles: Hang detect counter timeout value */
+	u32 hang_detect_cycles;
+	/** @protected_regs: Array of protected registers for the target */
+	const struct adreno_protected_regs *protected_regs;
+	/** @disable_tseskip: True if TSESkip logic is disabled */
+	bool disable_tseskip;
+	/** @gx_cpr_toggle: True to toggle GX CPR FSM to avoid CPR stalls */
+	bool gx_cpr_toggle;
+	/** @highest_bank_bit: The bit of the highest DDR bank */
+	u32 highest_bank_bit;
+	/** @ctxt_record_size: Size of the preemption record in bytes */
+	u64 ctxt_record_size;
+	/** @gmu_hub_clk_freq: Gmu hub interface clock frequency */
+	u64 gmu_hub_clk_freq;
+};
+
+#define SPTPRAC_POWERON_CTRL_MASK	0x00778000
+#define SPTPRAC_POWEROFF_CTRL_MASK	0x00778001
+#define SPTPRAC_POWEROFF_STATUS_MASK	BIT(2)
+#define SPTPRAC_POWERON_STATUS_MASK	BIT(3)
+#define A6XX_RETAIN_FF_ENABLE_ENABLE_MASK BIT(11)
+
+#define CP_CLUSTER_FE		0x0
+#define CP_CLUSTER_SP_VS	0x1
+#define CP_CLUSTER_PC_VS	0x2
+#define CP_CLUSTER_GRAS		0x3
+#define CP_CLUSTER_SP_PS	0x4
+#define CP_CLUSTER_PS		0x5
+#define CP_CLUSTER_VPC_PS	0x6
+
+/**
+ * struct a6xx_cp_preemption_record - CP context record for
+ * preemption.
+ * @magic: (00) Value at this offset must be equal to
+ * A6XX_CP_CTXRECORD_MAGIC_REF.
+ * @info: (04) Type of record. Written non-zero (usually) by CP.
+ * we must set to zero for all ringbuffers.
+ * @errno: (08) Error code. Initialize this to A6XX_CP_CTXRECORD_ERROR_NONE.
+ * CP will update to another value if a preemption error occurs.
+ * @data: (12) DATA field in YIELD and SET_MARKER packets.
+ * Written by CP when switching out. Not used on switch-in. Initialized to 0.
+ * @cntl: (16) RB_CNTL, saved and restored by CP. We must initialize this.
+ * @rptr: (20) RB_RPTR, saved and restored by CP. We must initialize this.
+ * @wptr: (24) RB_WPTR, saved and restored by CP. We must initialize this.
+ * @_pad28: (28) Reserved/padding.
+ * @rptr_addr: (32) RB_RPTR_ADDR_LO|HI saved and restored. We must initialize.
+ * rbase: (40) RB_BASE_LO|HI saved and restored.
+ * counter: (48) Pointer to preemption counter.
+ */
+struct a6xx_cp_preemption_record {
+	uint32_t  magic;
+	uint32_t  info;
+	uint32_t  errno;
+	uint32_t  data;
+	uint32_t  cntl;
+	uint32_t  rptr;
+	uint32_t  wptr;
+	uint32_t  _pad28;
+	uint64_t  rptr_addr;
+	uint64_t  rbase;
+	uint64_t  counter;
+};
+
+/**
+ * struct a6xx_cp_smmu_info - CP preemption SMMU info.
+ * @magic: (00) The value at this offset must be equal to
+ * A6XX_CP_SMMU_INFO_MAGIC_REF.
+ * @_pad4: (04) Reserved/padding
+ * @ttbr0: (08) Base address of the page table for the
+ * incoming context.
+ * @context_idr: (16) Context Identification Register value.
+ */
+struct a6xx_cp_smmu_info {
+	uint32_t  magic;
+	uint32_t  _pad4;
+	uint64_t  ttbr0;
+	uint32_t  asid;
+	uint32_t  context_idr;
+};
+
+#define A6XX_CP_SMMU_INFO_MAGIC_REF     0x241350D5UL
+
+#define A6XX_CP_CTXRECORD_MAGIC_REF     0xAE399D6EUL
+/* Size of each CP preemption record */
+#define A6XX_CP_CTXRECORD_SIZE_IN_BYTES     (2112 * 1024)
+/* Size of the user context record block (in bytes) */
+#define A6XX_CP_CTXRECORD_USER_RESTORE_SIZE (192 * 1024)
+/* Size of the performance counter save/restore block (in bytes) */
+#define A6XX_CP_PERFCOUNTER_SAVE_RESTORE_SIZE   (4 * 1024)
+
+#define A6XX_CP_RB_CNTL_DEFAULT (((ilog2(4) << 8) & 0x1F00) | \
+		(ilog2(KGSL_RB_DWORDS >> 1) & 0x3F))
+
+/* Size of the CP_INIT pm4 stream in dwords */
+#define A6XX_CP_INIT_DWORDS 11
+
+#define A6XX_INT_MASK \
+	((1 << A6XX_INT_CP_AHB_ERROR) |			\
+	 (1 << A6XX_INT_ATB_ASYNCFIFO_OVERFLOW) |	\
+	 (1 << A6XX_INT_RBBM_GPC_ERROR) |		\
+	 (1 << A6XX_INT_CP_SW) |			\
+	 (1 << A6XX_INT_CP_HW_ERROR) |			\
+	 (1 << A6XX_INT_CP_IB2) |			\
+	 (1 << A6XX_INT_CP_IB1) |			\
+	 (1 << A6XX_INT_CP_RB) |			\
+	 (1 << A6XX_INT_CP_CACHE_FLUSH_TS) |		\
+	 (1 << A6XX_INT_RBBM_ATB_BUS_OVERFLOW) |	\
+	 (1 << A6XX_INT_RBBM_HANG_DETECT) |		\
+	 (1 << A6XX_INT_UCHE_OOB_ACCESS) |		\
+	 (1 << A6XX_INT_UCHE_TRAP_INTR) |		\
+	 (1 << A6XX_INT_TSB_WRITE_ERROR))
+
+#define A6XX_HWSCHED_INT_MASK \
+	((1 << A6XX_INT_CP_AHB_ERROR) |			\
+	 (1 << A6XX_INT_ATB_ASYNCFIFO_OVERFLOW) |	\
+	 (1 << A6XX_INT_RBBM_ATB_BUS_OVERFLOW) |	\
+	 (1 << A6XX_INT_UCHE_OOB_ACCESS) |		\
+	 (1 << A6XX_INT_UCHE_TRAP_INTR) |		\
+	 (1 << A6XX_INT_TSB_WRITE_ERROR))
+
+/**
+ * to_a6xx_core - return the a6xx specific GPU core struct
+ * @adreno_dev: An Adreno GPU device handle
+ *
+ * Returns:
+ * A pointer to the a6xx specific GPU core struct
+ */
+static inline const struct adreno_a6xx_core *
+to_a6xx_core(struct adreno_device *adreno_dev)
+{
+	const struct adreno_gpu_core *core = adreno_dev->gpucore;
+
+	return container_of(core, struct adreno_a6xx_core, base);
+}
+
+/* Preemption functions */
+void a6xx_preemption_trigger(struct adreno_device *adreno_dev, bool atomic);
+void a6xx_preemption_schedule(struct adreno_device *adreno_dev);
+void a6xx_preemption_start(struct adreno_device *adreno_dev);
+int a6xx_preemption_init(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_preemption_post_ibsubmit - Insert commands following a submission
+ * @adreno_dev: Adreno GPU handle
+ * @cmds: Pointer to the ringbuffer to insert opcodes
+ *
+ * Return: The number of dwords written to @cmds
+ */
+u32 a6xx_preemption_post_ibsubmit(struct adreno_device *adreno_dev, u32 *cmds);
+
+/**
+ * a6xx_preemption_post_ibsubmit - Insert opcodes before a submission
+ * @adreno_dev: Adreno GPU handle
+ * @rb: The ringbuffer being written
+ * @drawctxt: The draw context being written
+ * @cmds: Pointer to the ringbuffer to insert opcodes
+ *
+ * Return: The number of dwords written to @cmds
+ */
+u32 a6xx_preemption_pre_ibsubmit(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb, struct adreno_context *drawctxt,
+		u32 *cmds);
+
+unsigned int a6xx_set_marker(unsigned int *cmds,
+		enum adreno_cp_marker_type type);
+
+void a6xx_preemption_callback(struct adreno_device *adreno_dev, int bit);
+
+int a6xx_preemption_context_init(struct kgsl_context *context);
+
+void a6xx_preemption_context_destroy(struct kgsl_context *context);
+
+void a6xx_snapshot(struct adreno_device *adreno_dev,
+		struct kgsl_snapshot *snapshot);
+void a6xx_crashdump_init(struct adreno_device *adreno_dev);
+int a6xx_gmu_sptprac_enable(struct adreno_device *adreno_dev);
+void a6xx_gmu_sptprac_disable(struct adreno_device *adreno_dev);
+bool a6xx_gmu_sptprac_is_on(struct adreno_device *adreno_dev);
+bool a619_holi_gx_is_on(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_read_alwayson - Read the current always on clock value
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * Return: The current value of the GMU always on counter
+ */
+u64 a6xx_read_alwayson(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_start - Program a6xx registers
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * This function does all a6xx register programming every
+ * time we boot the gpu
+ */
+void a6xx_start(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_init - Initialize a6xx resources
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * This function does a6xx specific one time initialization
+ * and is invoked when the very first client opens a
+ * kgsl instance
+ *
+ * Return: Zero on success and negative error on failure
+ */
+int a6xx_init(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_rb_start - A6xx specific ringbuffer setup
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * This function does a6xx specific ringbuffer setup and
+ * attempts to submit CP INIT and bring GPU out of secure mode
+ *
+ * Return: Zero on success and negative error on failure
+ */
+int a6xx_rb_start(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_microcode_read - Get the cp microcode from the filesystem
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * This function gets the firmware from filesystem and sets up
+ * the micorocode global buffer
+ *
+ * Return: Zero on success and negative error on failure
+ */
+int a6xx_microcode_read(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_probe_common - Probe common a6xx resources
+ * @pdev: Pointer to the platform device
+ * @adreno_dev: Pointer to the adreno device
+ * @chipid: Chipid of the target
+ * @gpucore: Pointer to the gpucore strucure
+ *
+ * This function sets up the a6xx resources common across all
+ * a6xx targets
+ */
+int a6xx_probe_common(struct platform_device *pdev,
+	struct  adreno_device *adreno_dev, u32 chipid,
+	const struct adreno_gpu_core *gpucore);
+
+/**
+ * a6xx_hw_isidle - Check whether a6xx gpu is idle or not
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * Return: True if gpu is idle, otherwise false
+ */
+bool a6xx_hw_isidle(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_spin_idle_debug - Debug logging used when gpu fails to idle
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * This function logs interesting registers and triggers a snapshot
+ */
+void a6xx_spin_idle_debug(struct adreno_device *adreno_dev,
+	const char *str);
+
+/**
+ * a6xx_perfcounter_update - Update the IFPC perfcounter list
+ * @adreno_dev: An Adreno GPU handle
+ * @reg: Perfcounter reg struct to add/remove to the list
+ * @update_reg: true if the perfcounter needs to be programmed by the CPU
+ *
+ * Return: 0 on success or -EBUSY if the lock couldn't be taken
+ */
+int a6xx_perfcounter_update(struct adreno_device *adreno_dev,
+	struct adreno_perfcount_register *reg, bool update_reg);
+
+/*
+ * a6xx_ringbuffer_init - Initialize the ringbuffers
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * Initialize the ringbuffer(s) for a6xx.
+ * Return: 0 on success or negative on failure
+ */
+int a6xx_ringbuffer_init(struct adreno_device *adreno_dev);
+
+extern const struct adreno_perfcounters adreno_a630_perfcounters;
+extern const struct adreno_perfcounters adreno_a6xx_perfcounters;
+extern const struct adreno_perfcounters adreno_a6xx_legacy_perfcounters;
+extern const struct adreno_perfcounters adreno_a6xx_hwsched_perfcounters;
+
+/**
+ * a6xx_rdpm_mx_freq_update - Update the mx frequency
+ * @gmu: An Adreno GMU handle
+ * @freq: Frequency in KHz
+ *
+ * This function communicates GPU mx frequency(in Mhz) changes to rdpm.
+ */
+void a6xx_rdpm_mx_freq_update(struct a6xx_gmu_device *gmu, u32 freq);
+
+/**
+ * a6xx_rdpm_cx_freq_update - Update the cx frequency
+ * @gmu: An Adreno GMU handle
+ * @freq: Frequency in KHz
+ *
+ * This function communicates GPU cx frequency(in Mhz) changes to rdpm.
+ */
+void a6xx_rdpm_cx_freq_update(struct a6xx_gmu_device *gmu, u32 freq);
+
+/**
+ * a6xx_ringbuffer_addcmds - Submit a command to the ringbuffer
+ * @adreno_dev: An Adreno GPU handle
+ * @rb: Pointer to the ringbuffer to submit on
+ * @drawctxt: Pointer to the draw context for the submission, or NULL for
+ * internal submissions
+ * @flags: Flags for the submission
+ * @in: Commands to write to the ringbuffer
+ * @dwords: Size of @in (in dwords)
+ * @timestamp: Timestamp for the submission
+ * @time: Optional pointer to a submit time structure
+ *
+ * Submit a command to the ringbuffer.
+ * Return: 0 on success or negative on failure
+ */
+int a6xx_ringbuffer_addcmds(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb, struct adreno_context *drawctxt,
+		u32 flags, u32 *in, u32 dwords, u32 timestamp,
+		struct adreno_submit_time *time);
+/**
+ * a6xx_ringbuffer_submitcmd - Submit a user command to the ringbuffer
+ * @adreno_dev: An Adreno GPU handle
+ * @cmdobj: Pointer to a user command object
+ * @flags: Internal submit flags
+ * @time: Optional pointer to a adreno_submit_time container
+ *
+ * Return: 0 on success or negative on failure
+ */
+int a6xx_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
+		struct kgsl_drawobj_cmd *cmdobj, u32 flags,
+		struct adreno_submit_time *time);
+
+
+int a6xx_fenced_write(struct adreno_device *adreno_dev, u32 offset,
+		u32 value, u32 mask);
+
+int a6xx_ringbuffer_submit(struct adreno_ringbuffer *rb,
+		struct adreno_submit_time *time, bool sync);
+
+void a6xx_cp_init_cmds(struct adreno_device *adreno_dev, u32 *cmds);
+
+int a6xx_gmu_hfi_probe(struct adreno_device *adreno_dev);
+
+static inline const struct a6xx_gpudev *
+to_a6xx_gpudev(const struct adreno_gpudev *gpudev)
+{
+	return container_of(gpudev, struct a6xx_gpudev, base);
+}
+
+/**
+ * a6xx_reset_preempt_records - Reset the preemption buffers
+ * @adreno_dev: Handle to the adreno device
+ *
+ * Reset the preemption records at the time of hard reset
+ */
+void a6xx_reset_preempt_records(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_irq_pending - Check if there is any gpu irq pending
+ * @adreno_dev: Handle to the adreno device
+ *
+ * Return true if there is any gpu irq pending
+ */
+bool a6xx_irq_pending(struct adreno_device *adreno_dev);
+
+#ifdef CONFIG_QCOM_KGSL_CORESIGHT
+void a6xx_coresight_init(struct adreno_device *device);
+#else
+static inline void a6xx_coresight_init(struct adreno_device *device) { }
+#endif
+
+#endif
--- a/qcom/opensource/graphics-kernel/adreno_a6xx_coresight.c
+++ b/qcom/opensource/graphics-kernel/adreno_a6xx_coresight.c
@ -0,0 +1,432 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2017-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022, 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include "adreno.h"
+#include "adreno_a6xx.h"
+#include "adreno_coresight.h"
+
+static struct adreno_coresight_register a6xx_coresight_regs[] = {
+	{ A6XX_DBGC_CFG_DBGBUS_SEL_A },
+	{ A6XX_DBGC_CFG_DBGBUS_SEL_B },
+	{ A6XX_DBGC_CFG_DBGBUS_SEL_C },
+	{ A6XX_DBGC_CFG_DBGBUS_SEL_D },
+	{ A6XX_DBGC_CFG_DBGBUS_CNTLT },
+	{ A6XX_DBGC_CFG_DBGBUS_CNTLM },
+	{ A6XX_DBGC_CFG_DBGBUS_OPL },
+	{ A6XX_DBGC_CFG_DBGBUS_OPE },
+	{ A6XX_DBGC_CFG_DBGBUS_IVTL_0 },
+	{ A6XX_DBGC_CFG_DBGBUS_IVTL_1 },
+	{ A6XX_DBGC_CFG_DBGBUS_IVTL_2 },
+	{ A6XX_DBGC_CFG_DBGBUS_IVTL_3 },
+	{ A6XX_DBGC_CFG_DBGBUS_MASKL_0 },
+	{ A6XX_DBGC_CFG_DBGBUS_MASKL_1 },
+	{ A6XX_DBGC_CFG_DBGBUS_MASKL_2 },
+	{ A6XX_DBGC_CFG_DBGBUS_MASKL_3 },
+	{ A6XX_DBGC_CFG_DBGBUS_BYTEL_0 },
+	{ A6XX_DBGC_CFG_DBGBUS_BYTEL_1 },
+	{ A6XX_DBGC_CFG_DBGBUS_IVTE_0 },
+	{ A6XX_DBGC_CFG_DBGBUS_IVTE_1 },
+	{ A6XX_DBGC_CFG_DBGBUS_IVTE_2 },
+	{ A6XX_DBGC_CFG_DBGBUS_IVTE_3 },
+	{ A6XX_DBGC_CFG_DBGBUS_MASKE_0 },
+	{ A6XX_DBGC_CFG_DBGBUS_MASKE_1 },
+	{ A6XX_DBGC_CFG_DBGBUS_MASKE_2 },
+	{ A6XX_DBGC_CFG_DBGBUS_MASKE_3 },
+	{ A6XX_DBGC_CFG_DBGBUS_NIBBLEE },
+	{ A6XX_DBGC_CFG_DBGBUS_PTRC0 },
+	{ A6XX_DBGC_CFG_DBGBUS_PTRC1 },
+	{ A6XX_DBGC_CFG_DBGBUS_LOADREG },
+	{ A6XX_DBGC_CFG_DBGBUS_IDX },
+	{ A6XX_DBGC_CFG_DBGBUS_CLRC },
+	{ A6XX_DBGC_CFG_DBGBUS_LOADIVT },
+	{ A6XX_DBGC_VBIF_DBG_CNTL },
+	{ A6XX_DBGC_DBG_LO_HI_GPIO },
+	{ A6XX_DBGC_EXT_TRACE_BUS_CNTL },
+	{ A6XX_DBGC_READ_AHB_THROUGH_DBG },
+	{ A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1 },
+	{ A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2 },
+	{ A6XX_DBGC_EVT_CFG },
+	{ A6XX_DBGC_EVT_INTF_SEL_0 },
+	{ A6XX_DBGC_EVT_INTF_SEL_1 },
+	{ A6XX_DBGC_PERF_ATB_CFG },
+	{ A6XX_DBGC_PERF_ATB_COUNTER_SEL_0 },
+	{ A6XX_DBGC_PERF_ATB_COUNTER_SEL_1 },
+	{ A6XX_DBGC_PERF_ATB_COUNTER_SEL_2 },
+	{ A6XX_DBGC_PERF_ATB_COUNTER_SEL_3 },
+	{ A6XX_DBGC_PERF_ATB_TRIG_INTF_SEL_0 },
+	{ A6XX_DBGC_PERF_ATB_TRIG_INTF_SEL_1 },
+	{ A6XX_DBGC_PERF_ATB_DRAIN_CMD },
+	{ A6XX_DBGC_ECO_CNTL },
+	{ A6XX_DBGC_AHB_DBG_CNTL },
+};
+
+static struct adreno_coresight_register a6xx_coresight_regs_cx[] = {
+	{ A6XX_CX_DBGC_CFG_DBGBUS_SEL_A },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_SEL_B },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_SEL_C },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_SEL_D },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_CNTLT },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_CNTLM },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_OPL },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_OPE },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_IVTL_0 },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_IVTL_1 },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_IVTL_2 },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_IVTL_3 },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_MASKL_0 },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_MASKL_1 },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_MASKL_2 },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_MASKL_3 },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_0 },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_BYTEL_1 },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_IVTE_0 },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_IVTE_1 },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_IVTE_2 },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_IVTE_3 },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_MASKE_0 },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_MASKE_1 },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_MASKE_2 },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_MASKE_3 },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_NIBBLEE },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_PTRC0 },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_PTRC1 },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_LOADREG },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_IDX },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_CLRC },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_LOADIVT },
+	{ A6XX_CX_DBGC_VBIF_DBG_CNTL },
+	{ A6XX_CX_DBGC_DBG_LO_HI_GPIO },
+	{ A6XX_CX_DBGC_EXT_TRACE_BUS_CNTL },
+	{ A6XX_CX_DBGC_READ_AHB_THROUGH_DBG },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1 },
+	{ A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2 },
+	{ A6XX_CX_DBGC_EVT_CFG },
+	{ A6XX_CX_DBGC_EVT_INTF_SEL_0 },
+	{ A6XX_CX_DBGC_EVT_INTF_SEL_1 },
+	{ A6XX_CX_DBGC_PERF_ATB_CFG },
+	{ A6XX_CX_DBGC_PERF_ATB_COUNTER_SEL_0 },
+	{ A6XX_CX_DBGC_PERF_ATB_COUNTER_SEL_1 },
+	{ A6XX_CX_DBGC_PERF_ATB_COUNTER_SEL_2 },
+	{ A6XX_CX_DBGC_PERF_ATB_COUNTER_SEL_3 },
+	{ A6XX_CX_DBGC_PERF_ATB_TRIG_INTF_SEL_0 },
+	{ A6XX_CX_DBGC_PERF_ATB_TRIG_INTF_SEL_1 },
+	{ A6XX_CX_DBGC_PERF_ATB_DRAIN_CMD },
+	{ A6XX_CX_DBGC_ECO_CNTL },
+	{ A6XX_CX_DBGC_AHB_DBG_CNTL },
+};
+
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_a, &a6xx_coresight_regs[0]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_b, &a6xx_coresight_regs[1]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_c, &a6xx_coresight_regs[2]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_d, &a6xx_coresight_regs[3]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_cntlt, &a6xx_coresight_regs[4]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_cntlm, &a6xx_coresight_regs[5]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_opl, &a6xx_coresight_regs[6]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ope, &a6xx_coresight_regs[7]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_0, &a6xx_coresight_regs[8]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_1, &a6xx_coresight_regs[9]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_2, &a6xx_coresight_regs[10]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_3, &a6xx_coresight_regs[11]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_0, &a6xx_coresight_regs[12]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_1, &a6xx_coresight_regs[13]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_2, &a6xx_coresight_regs[14]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_3, &a6xx_coresight_regs[15]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_bytel_0, &a6xx_coresight_regs[16]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_bytel_1, &a6xx_coresight_regs[17]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_0, &a6xx_coresight_regs[18]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_1, &a6xx_coresight_regs[19]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_2, &a6xx_coresight_regs[20]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_3, &a6xx_coresight_regs[21]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_0, &a6xx_coresight_regs[22]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_1, &a6xx_coresight_regs[23]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_2, &a6xx_coresight_regs[24]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_3, &a6xx_coresight_regs[25]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_nibblee, &a6xx_coresight_regs[26]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ptrc0, &a6xx_coresight_regs[27]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ptrc1, &a6xx_coresight_regs[28]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_loadreg, &a6xx_coresight_regs[29]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_idx, &a6xx_coresight_regs[30]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_clrc, &a6xx_coresight_regs[31]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_loadivt, &a6xx_coresight_regs[32]);
+static ADRENO_CORESIGHT_ATTR(vbif_dbg_cntl, &a6xx_coresight_regs[33]);
+static ADRENO_CORESIGHT_ATTR(dbg_lo_hi_gpio, &a6xx_coresight_regs[34]);
+static ADRENO_CORESIGHT_ATTR(ext_trace_bus_cntl, &a6xx_coresight_regs[35]);
+static ADRENO_CORESIGHT_ATTR(read_ahb_through_dbg, &a6xx_coresight_regs[36]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf1, &a6xx_coresight_regs[37]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf2, &a6xx_coresight_regs[38]);
+static ADRENO_CORESIGHT_ATTR(evt_cfg, &a6xx_coresight_regs[39]);
+static ADRENO_CORESIGHT_ATTR(evt_intf_sel_0, &a6xx_coresight_regs[40]);
+static ADRENO_CORESIGHT_ATTR(evt_intf_sel_1, &a6xx_coresight_regs[41]);
+static ADRENO_CORESIGHT_ATTR(perf_atb_cfg, &a6xx_coresight_regs[42]);
+static ADRENO_CORESIGHT_ATTR(perf_atb_counter_sel_0, &a6xx_coresight_regs[43]);
+static ADRENO_CORESIGHT_ATTR(perf_atb_counter_sel_1, &a6xx_coresight_regs[44]);
+static ADRENO_CORESIGHT_ATTR(perf_atb_counter_sel_2, &a6xx_coresight_regs[45]);
+static ADRENO_CORESIGHT_ATTR(perf_atb_counter_sel_3, &a6xx_coresight_regs[46]);
+static ADRENO_CORESIGHT_ATTR(perf_atb_trig_intf_sel_0,
+				&a6xx_coresight_regs[47]);
+static ADRENO_CORESIGHT_ATTR(perf_atb_trig_intf_sel_1,
+				&a6xx_coresight_regs[48]);
+static ADRENO_CORESIGHT_ATTR(perf_atb_drain_cmd, &a6xx_coresight_regs[49]);
+static ADRENO_CORESIGHT_ATTR(eco_cntl, &a6xx_coresight_regs[50]);
+static ADRENO_CORESIGHT_ATTR(ahb_dbg_cntl, &a6xx_coresight_regs[51]);
+
+/*CX debug registers*/
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_sel_a,
+				&a6xx_coresight_regs_cx[0]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_sel_b,
+				&a6xx_coresight_regs_cx[1]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_sel_c,
+				&a6xx_coresight_regs_cx[2]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_sel_d,
+				&a6xx_coresight_regs_cx[3]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_cntlt,
+				&a6xx_coresight_regs_cx[4]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_cntlm,
+				&a6xx_coresight_regs_cx[5]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_opl,
+				&a6xx_coresight_regs_cx[6]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ope,
+				&a6xx_coresight_regs_cx[7]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivtl_0,
+				&a6xx_coresight_regs_cx[8]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivtl_1,
+				&a6xx_coresight_regs_cx[9]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivtl_2,
+				&a6xx_coresight_regs_cx[10]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivtl_3,
+				&a6xx_coresight_regs_cx[11]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maskl_0,
+				&a6xx_coresight_regs_cx[12]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maskl_1,
+				&a6xx_coresight_regs_cx[13]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maskl_2,
+				&a6xx_coresight_regs_cx[14]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maskl_3,
+				&a6xx_coresight_regs_cx[15]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_bytel_0,
+				&a6xx_coresight_regs_cx[16]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_bytel_1,
+				&a6xx_coresight_regs_cx[17]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivte_0,
+				&a6xx_coresight_regs_cx[18]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivte_1,
+				&a6xx_coresight_regs_cx[19]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivte_2,
+				&a6xx_coresight_regs_cx[20]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivte_3,
+				&a6xx_coresight_regs_cx[21]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maske_0,
+				&a6xx_coresight_regs_cx[22]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maske_1,
+				&a6xx_coresight_regs_cx[23]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maske_2,
+				&a6xx_coresight_regs_cx[24]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maske_3,
+				&a6xx_coresight_regs_cx[25]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_nibblee,
+				&a6xx_coresight_regs_cx[26]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ptrc0,
+				&a6xx_coresight_regs_cx[27]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ptrc1,
+				&a6xx_coresight_regs_cx[28]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_loadreg,
+				&a6xx_coresight_regs_cx[29]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_idx,
+				&a6xx_coresight_regs_cx[30]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_clrc,
+				&a6xx_coresight_regs_cx[31]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_loadivt,
+				&a6xx_coresight_regs_cx[32]);
+static ADRENO_CORESIGHT_ATTR(cx_vbif_dbg_cntl,
+				&a6xx_coresight_regs_cx[33]);
+static ADRENO_CORESIGHT_ATTR(cx_dbg_lo_hi_gpio,
+				&a6xx_coresight_regs_cx[34]);
+static ADRENO_CORESIGHT_ATTR(cx_ext_trace_bus_cntl,
+				&a6xx_coresight_regs_cx[35]);
+static ADRENO_CORESIGHT_ATTR(cx_read_ahb_through_dbg,
+				&a6xx_coresight_regs_cx[36]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_trace_buf1,
+				&a6xx_coresight_regs_cx[37]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_trace_buf2,
+				&a6xx_coresight_regs_cx[38]);
+static ADRENO_CORESIGHT_ATTR(cx_evt_cfg,
+				&a6xx_coresight_regs_cx[39]);
+static ADRENO_CORESIGHT_ATTR(cx_evt_intf_sel_0,
+				&a6xx_coresight_regs_cx[40]);
+static ADRENO_CORESIGHT_ATTR(cx_evt_intf_sel_1,
+				&a6xx_coresight_regs_cx[41]);
+static ADRENO_CORESIGHT_ATTR(cx_perf_atb_cfg,
+				&a6xx_coresight_regs_cx[42]);
+static ADRENO_CORESIGHT_ATTR(cx_perf_atb_counter_sel_0,
+				&a6xx_coresight_regs_cx[43]);
+static ADRENO_CORESIGHT_ATTR(cx_perf_atb_counter_sel_1,
+				&a6xx_coresight_regs_cx[44]);
+static ADRENO_CORESIGHT_ATTR(cx_perf_atb_counter_sel_2,
+				&a6xx_coresight_regs_cx[45]);
+static ADRENO_CORESIGHT_ATTR(cx_perf_atb_counter_sel_3,
+				&a6xx_coresight_regs_cx[46]);
+static ADRENO_CORESIGHT_ATTR(cx_perf_atb_trig_intf_sel_0,
+				&a6xx_coresight_regs_cx[47]);
+static ADRENO_CORESIGHT_ATTR(cx_perf_atb_trig_intf_sel_1,
+				&a6xx_coresight_regs_cx[48]);
+static ADRENO_CORESIGHT_ATTR(cx_perf_atb_drain_cmd,
+				&a6xx_coresight_regs_cx[49]);
+static ADRENO_CORESIGHT_ATTR(cx_eco_cntl,
+				&a6xx_coresight_regs_cx[50]);
+static ADRENO_CORESIGHT_ATTR(cx_ahb_dbg_cntl,
+				&a6xx_coresight_regs_cx[51]);
+
+static struct attribute *a6xx_coresight_attrs[] = {
+	&coresight_attr_cfg_dbgbus_sel_a.attr.attr,
+	&coresight_attr_cfg_dbgbus_sel_b.attr.attr,
+	&coresight_attr_cfg_dbgbus_sel_c.attr.attr,
+	&coresight_attr_cfg_dbgbus_sel_d.attr.attr,
+	&coresight_attr_cfg_dbgbus_cntlt.attr.attr,
+	&coresight_attr_cfg_dbgbus_cntlm.attr.attr,
+	&coresight_attr_cfg_dbgbus_opl.attr.attr,
+	&coresight_attr_cfg_dbgbus_ope.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivtl_0.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivtl_1.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivtl_2.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivtl_3.attr.attr,
+	&coresight_attr_cfg_dbgbus_maskl_0.attr.attr,
+	&coresight_attr_cfg_dbgbus_maskl_1.attr.attr,
+	&coresight_attr_cfg_dbgbus_maskl_2.attr.attr,
+	&coresight_attr_cfg_dbgbus_maskl_3.attr.attr,
+	&coresight_attr_cfg_dbgbus_bytel_0.attr.attr,
+	&coresight_attr_cfg_dbgbus_bytel_1.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivte_0.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivte_1.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivte_2.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivte_3.attr.attr,
+	&coresight_attr_cfg_dbgbus_maske_0.attr.attr,
+	&coresight_attr_cfg_dbgbus_maske_1.attr.attr,
+	&coresight_attr_cfg_dbgbus_maske_2.attr.attr,
+	&coresight_attr_cfg_dbgbus_maske_3.attr.attr,
+	&coresight_attr_cfg_dbgbus_nibblee.attr.attr,
+	&coresight_attr_cfg_dbgbus_ptrc0.attr.attr,
+	&coresight_attr_cfg_dbgbus_ptrc1.attr.attr,
+	&coresight_attr_cfg_dbgbus_loadreg.attr.attr,
+	&coresight_attr_cfg_dbgbus_idx.attr.attr,
+	&coresight_attr_cfg_dbgbus_clrc.attr.attr,
+	&coresight_attr_cfg_dbgbus_loadivt.attr.attr,
+	&coresight_attr_vbif_dbg_cntl.attr.attr,
+	&coresight_attr_dbg_lo_hi_gpio.attr.attr,
+	&coresight_attr_ext_trace_bus_cntl.attr.attr,
+	&coresight_attr_read_ahb_through_dbg.attr.attr,
+	&coresight_attr_cfg_dbgbus_trace_buf1.attr.attr,
+	&coresight_attr_cfg_dbgbus_trace_buf2.attr.attr,
+	&coresight_attr_evt_cfg.attr.attr,
+	&coresight_attr_evt_intf_sel_0.attr.attr,
+	&coresight_attr_evt_intf_sel_1.attr.attr,
+	&coresight_attr_perf_atb_cfg.attr.attr,
+	&coresight_attr_perf_atb_counter_sel_0.attr.attr,
+	&coresight_attr_perf_atb_counter_sel_1.attr.attr,
+	&coresight_attr_perf_atb_counter_sel_2.attr.attr,
+	&coresight_attr_perf_atb_counter_sel_3.attr.attr,
+	&coresight_attr_perf_atb_trig_intf_sel_0.attr.attr,
+	&coresight_attr_perf_atb_trig_intf_sel_1.attr.attr,
+	&coresight_attr_perf_atb_drain_cmd.attr.attr,
+	&coresight_attr_eco_cntl.attr.attr,
+	&coresight_attr_ahb_dbg_cntl.attr.attr,
+	NULL,
+};
+
+/*cx*/
+static struct attribute *a6xx_coresight_attrs_cx[] = {
+	&coresight_attr_cx_cfg_dbgbus_sel_a.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_sel_b.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_sel_c.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_sel_d.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_cntlt.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_cntlm.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_opl.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_ope.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_ivtl_0.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_ivtl_1.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_ivtl_2.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_ivtl_3.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_maskl_0.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_maskl_1.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_maskl_2.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_maskl_3.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_bytel_0.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_bytel_1.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_ivte_0.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_ivte_1.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_ivte_2.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_ivte_3.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_maske_0.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_maske_1.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_maske_2.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_maske_3.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_nibblee.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_ptrc0.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_ptrc1.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_loadreg.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_idx.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_clrc.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_loadivt.attr.attr,
+	&coresight_attr_cx_vbif_dbg_cntl.attr.attr,
+	&coresight_attr_cx_dbg_lo_hi_gpio.attr.attr,
+	&coresight_attr_cx_ext_trace_bus_cntl.attr.attr,
+	&coresight_attr_cx_read_ahb_through_dbg.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_trace_buf1.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_trace_buf2.attr.attr,
+	&coresight_attr_cx_evt_cfg.attr.attr,
+	&coresight_attr_cx_evt_intf_sel_0.attr.attr,
+	&coresight_attr_cx_evt_intf_sel_1.attr.attr,
+	&coresight_attr_cx_perf_atb_cfg.attr.attr,
+	&coresight_attr_cx_perf_atb_counter_sel_0.attr.attr,
+	&coresight_attr_cx_perf_atb_counter_sel_1.attr.attr,
+	&coresight_attr_cx_perf_atb_counter_sel_2.attr.attr,
+	&coresight_attr_cx_perf_atb_counter_sel_3.attr.attr,
+	&coresight_attr_cx_perf_atb_trig_intf_sel_0.attr.attr,
+	&coresight_attr_cx_perf_atb_trig_intf_sel_1.attr.attr,
+	&coresight_attr_cx_perf_atb_drain_cmd.attr.attr,
+	&coresight_attr_cx_eco_cntl.attr.attr,
+	&coresight_attr_cx_ahb_dbg_cntl.attr.attr,
+	NULL,
+};
+
+static const struct attribute_group a6xx_coresight_group = {
+	.attrs = a6xx_coresight_attrs,
+};
+
+static const struct attribute_group *a6xx_coresight_groups[] = {
+	&a6xx_coresight_group,
+	NULL,
+};
+
+static const struct attribute_group a6xx_coresight_group_cx = {
+	.attrs = a6xx_coresight_attrs_cx,
+};
+
+static const struct attribute_group *a6xx_coresight_groups_cx[] = {
+	&a6xx_coresight_group_cx,
+	NULL,
+};
+
+static const struct adreno_coresight a6xx_coresight = {
+	.registers = a6xx_coresight_regs,
+	.count = ARRAY_SIZE(a6xx_coresight_regs),
+	.groups = a6xx_coresight_groups,
+};
+
+static const struct adreno_coresight a6xx_coresight_cx = {
+	.registers = a6xx_coresight_regs_cx,
+	.count = ARRAY_SIZE(a6xx_coresight_regs_cx),
+	.groups = a6xx_coresight_groups_cx,
+};
+
+void a6xx_coresight_init(struct adreno_device *adreno_dev)
+{
+	adreno_coresight_add_device(adreno_dev, "coresight-gfx",
+		&a6xx_coresight, &adreno_dev->gx_coresight);
+
+	adreno_coresight_add_device(adreno_dev, "coresight-gfx-cx",
+		&a6xx_coresight_cx, &adreno_dev->cx_coresight);
+}
--- a/qcom/opensource/graphics-kernel/adreno_a6xx_gmu.c
+++ b/qcom/opensource/graphics-kernel/adreno_a6xx_gmu.c
--- a/qcom/opensource/graphics-kernel/adreno_a6xx_gmu.h
+++ b/qcom/opensource/graphics-kernel/adreno_a6xx_gmu.h
@ -0,0 +1,451 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2017-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+#ifndef __ADRENO_A6XX_GMU_H
+#define __ADRENO_A6XX_GMU_H
+
+#include <linux/mailbox_client.h>
+
+#include "adreno_a6xx_hfi.h"
+#include "kgsl_gmu_core.h"
+
+/**
+ * struct a6xx_gmu_device - GMU device structure
+ * @ver: GMU Version information
+ * @irq: GMU interrupt number
+ * @fw_image: GMU FW image
+ * @hfi_mem: pointer to HFI shared memory
+ * @dump_mem: pointer to GMU debug dump memory
+ * @gmu_log: gmu event log memory
+ * @hfi: HFI controller
+ * @num_gpupwrlevels: number GPU frequencies in GPU freq table
+ * @num_bwlevel: number of GPU BW levels
+ * @num_cnocbwlevel: number CNOC BW levels
+ * @rpmh_votes: RPMh TCS command set for GPU, GMU voltage and bw scaling
+ * @clks: GPU subsystem clocks required for GMU functionality
+ * @wakeup_pwrlevel: GPU wake up power/DCVS level in case different
+ *		than default power level
+ * @idle_level: Minimal GPU idle power level
+ * @fault_count: GMU fault count
+ * @mailbox: Messages to AOP for ACD enable/disable go through this
+ * @log_wptr_retention: Store the log wptr offset on slumber
+ */
+struct a6xx_gmu_device {
+	struct {
+		u32 core;
+		u32 core_dev;
+		u32 pwr;
+		u32 pwr_dev;
+		u32 hfi;
+	} ver;
+	struct platform_device *pdev;
+	int irq;
+	const struct firmware *fw_image;
+	struct kgsl_memdesc *dump_mem;
+	struct kgsl_memdesc *gmu_log;
+	/** @vrb: GMU virtual register bank memory */
+	struct kgsl_memdesc *vrb;
+	/** @trace: gmu trace container */
+	struct kgsl_gmu_trace trace;
+	struct a6xx_hfi hfi;
+	struct clk_bulk_data *clks;
+	/** @num_clks: Number of entries in the @clks array */
+	int num_clks;
+	unsigned int idle_level;
+	/** @freqs: Array of GMU frequencies */
+	u32 freqs[GMU_MAX_PWRLEVELS];
+	/** @vlvls: Array of GMU voltage levels */
+	u32 vlvls[GMU_MAX_PWRLEVELS];
+	struct kgsl_mailbox mailbox;
+	bool preallocations;
+	/** @gmu_globals: Array to store gmu global buffers */
+	struct kgsl_memdesc gmu_globals[GMU_KERNEL_ENTRIES];
+	/** @global_entries: To keep track of number of gmu buffers */
+	u32 global_entries;
+	struct gmu_vma_entry *vma;
+	unsigned int log_wptr_retention;
+	/** @cm3_fault: whether gmu received a cm3 fault interrupt */
+	atomic_t cm3_fault;
+	/**
+	 * @itcm_shadow: Copy of the itcm block in firmware binary used for
+	 * snapshot
+	 */
+	void *itcm_shadow;
+	/** @flags: Internal gmu flags */
+	unsigned long flags;
+	/** @rscc_virt: Pointer where RSCC block is mapped */
+	void __iomem *rscc_virt;
+	/** @domain: IOMMU domain for the kernel context */
+	struct iommu_domain *domain;
+	/** @rdpm_cx_virt: Pointer where the RDPM CX block is mapped */
+	void __iomem *rdpm_cx_virt;
+	/** @rdpm_mx_virt: Pointer where the RDPM MX block is mapped */
+	void __iomem *rdpm_mx_virt;
+	/** @log_stream_enable: GMU log streaming enable. Disabled by default */
+	bool log_stream_enable;
+	/** @log_group_mask: Allows overriding default GMU log group mask */
+	u32 log_group_mask;
+	struct kobject log_kobj;
+	/*
+	 * @perf_ddr_bw: The lowest ddr bandwidth that puts CX at a corner at
+	 * which GMU can run at higher frequency.
+	 */
+	u32 perf_ddr_bw;
+	/** @num_oob_perfcntr: Number of active oob_perfcntr requests */
+	u32 num_oob_perfcntr;
+	/** @pdc_cfg_base: Base address of PDC cfg registers */
+	void __iomem *pdc_cfg_base;
+	/** @pdc_seq_base: Base address of PDC seq registers */
+	void __iomem *pdc_seq_base;
+	/** @stats_enable: GMU stats feature enable */
+	bool stats_enable;
+	/** @stats_mask: GMU performance countables to enable */
+	u32 stats_mask;
+	/** @stats_interval: GMU performance counters sampling interval */
+	u32 stats_interval;
+	/** @stats_kobj: kernel object for GMU stats directory in sysfs */
+	struct kobject stats_kobj;
+};
+
+/* Helper function to get to a6xx gmu device from adreno device */
+struct a6xx_gmu_device *to_a6xx_gmu(struct adreno_device *adreno_dev);
+
+/* Helper function to get to adreno device from a6xx gmu device */
+struct adreno_device *a6xx_gmu_to_adreno(struct a6xx_gmu_device *gmu);
+
+/**
+ * reserve_gmu_kernel_block() - Allocate a gmu buffer
+ * @gmu: Pointer to the a6xx gmu device
+ * @addr: Desired gmu virtual address
+ * @size: Size of the buffer in bytes
+ * @vma_id: Target gmu vma where this bufer should be mapped
+ * @va_align: Alignment as a power of two(2^n) bytes for the GMU VA
+ *
+ * This function allocates a buffer and maps it in
+ * the desired gmu vma
+ *
+ * Return: Pointer to the memory descriptor or error pointer on failure
+ */
+struct kgsl_memdesc *reserve_gmu_kernel_block(struct a6xx_gmu_device *gmu,
+	u32 addr, u32 size, u32 vma_id, u32 va_align);
+
+/**
+ * reserve_gmu_kernel_block_fixed() - Maps phyical resource address to gmu
+ * @gmu: Pointer to the a6xx gmu device
+ * @addr: Desired gmu virtual address
+ * @size: Size of the buffer in bytes
+ * @vma_id: Target gmu vma where this buffer should be mapped
+ * @resource: Name of the resource to get the size and address to allocate
+ * @attrs: Attributes for the mapping
+ * @va_align: Alignment as a power of two(2^n) bytes for the GMU VA
+ *
+ * This function maps the physcial resource address to desired gmu vma
+ *
+ * Return: Pointer to the memory descriptor or error pointer on failure
+ */
+struct kgsl_memdesc *reserve_gmu_kernel_block_fixed(struct a6xx_gmu_device *gmu,
+	u32 addr, u32 size, u32 vma_id, const char *resource, int attrs, u32 va_align);
+
+/**
+ * a6xx_build_rpmh_tables - Build the rpmh tables
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * This function creates the gpu dcvs and bw tables
+ *
+ * Return: 0 on success and negative error on failure
+ */
+int a6xx_build_rpmh_tables(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_gmu_gx_is_on - Check if GX is on
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * This function reads pwr status registers to check if GX
+ * is on or off
+ */
+bool a6xx_gmu_gx_is_on(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_gmu_device_snapshot - A6XX GMU snapshot function
+ * @device: Device being snapshotted
+ * @snapshot: Pointer to the snapshot instance
+ *
+ * This is where all of the A6XX GMU specific bits and pieces are grabbed
+ * into the snapshot memory
+ */
+void a6xx_gmu_device_snapshot(struct kgsl_device *device,
+	struct kgsl_snapshot *snapshot);
+
+/**
+ * a6xx_gmu_device_probe - A6XX GMU snapshot function
+ * @pdev: Pointer to the platform device
+ * @chipid: Chipid of the target
+ * @gpucore: Pointer to the gpucore
+ *
+ * The target specific probe function for gmu based a6xx targets.
+ */
+int a6xx_gmu_device_probe(struct platform_device *pdev,
+	u32 chipid, const struct adreno_gpu_core *gpucore);
+
+/**
+ * a6xx_gmu_reset - Reset and restart the gmu
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_gmu_reset(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_enable_gpu_irq - Enable gpu interrupt
+ * @adreno_dev: Pointer to the adreno device
+ */
+void a6xx_enable_gpu_irq(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_disable_gpu_irq - Disable gpu interrupt
+ * @adreno_dev: Pointer to the adreno device
+ */
+void a6xx_disable_gpu_irq(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_gmu_snapshot- Take snapshot for gmu targets
+ * @adreno_dev: Pointer to the adreno device
+ * @snapshot: Pointer to the snapshot structure
+ *
+ * Send an NMI to gmu if we hit a gmu fault. Then take gmu
+ * snapshot and carry on with rest of the a6xx snapshot
+ */
+void a6xx_gmu_snapshot(struct adreno_device *adreno_dev,
+	struct kgsl_snapshot *snapshot);
+
+/**
+ * a6xx_gmu_probe - Probe a6xx gmu resources
+ * @device: Pointer to the kgsl device
+ * @pdev: Pointer to the gmu platform device
+ *
+ * Probe the gmu and hfi resources
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_gmu_probe(struct kgsl_device *device,
+	struct platform_device *pdev);
+
+/**
+ * a6xx_gmu_parse_fw - Parse the gmu fw binary
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_gmu_parse_fw(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_gmu_memory_init - Allocate gmu memory
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Allocates the gmu log buffer and others if ndeeded.
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_gmu_memory_init(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_gmu_aop_send_acd_state - Enable or disable acd feature in aop
+ * @gmu: Pointer to the a6xx gmu device
+ * @flag: Boolean to enable or disable acd in aop
+ *
+ * This function enables or disables gpu acd feature using mailbox
+ */
+void a6xx_gmu_aop_send_acd_state(struct a6xx_gmu_device *gmu, bool flag);
+
+/**
+ * a6xx_gmu_disable_gdsc - Disable gmu gdsc
+ * @adreno_dev: Pointer to the adreno device
+ */
+void a6xx_gmu_disable_gdsc(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_gmu_load_fw - Load gmu firmware
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Loads the gmu firmware binary into TCMs and memory
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_gmu_load_fw(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_gmu_device_start - Bring gmu out of reset
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_gmu_device_start(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_gmu_hfi_start - Indicate hfi start to gmu
+ * @device: Pointer to the kgsl device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_gmu_hfi_start(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_gmu_itcm_shadow - Create itcm shadow copy for snapshot
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_gmu_itcm_shadow(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_gmu_register_config - gmu register configuration
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Program gmu regsiters based on features
+ */
+void a6xx_gmu_register_config(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_gmu_version_info - Get gmu firmware version
+ * @adreno_dev: Pointer to the adreno device
+ */
+void a6xx_gmu_version_info(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_gmu_irq_enable - Enable gmu interrupts
+ * @adreno_dev: Pointer to the adreno device
+ */
+void a6xx_gmu_irq_enable(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_gmu_irq_disable - Disaable gmu interrupts
+ * @adreno_dev: Pointer to the adreno device
+ */
+void a6xx_gmu_irq_disable(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_gmu_suspend - Hard reset the gpu and gmu
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * In case we hit a gmu fault, hard reset the gpu and gmu
+ * to recover from the fault
+ */
+void a6xx_gmu_suspend(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_gmu_oob_set - send gmu oob request
+ * @device: Pointer to the kgsl device
+ * @req: Type of oob request as defined in enum oob_request
+ *
+ * Request gmu to keep gpu powered up till the oob is cleared
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_gmu_oob_set(struct kgsl_device *device, enum oob_request oob);
+
+/**
+ * a6xx_gmu_oob_clear - clear an asserted oob request
+ * @device: Pointer to the kgsl device
+ * @req: Type of oob request as defined in enum oob_request
+ *
+ * Clear a previously requested oob so that gmu can power
+ * collapse the gpu
+ */
+void a6xx_gmu_oob_clear(struct kgsl_device *device, enum oob_request oob);
+
+/**
+ * a6xx_gmu_wait_for_lowest_idle - wait for gmu to complete ifpc
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * If ifpc is enabled, wait for gmu to put gpu into ifpc.
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_gmu_wait_for_lowest_idle(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_gmu_wait_for_idle - Wait for gmu to become idle
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_gmu_wait_for_idle(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_rscc_sleep_sequence - Trigger rscc sleep sequence
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_rscc_sleep_sequence(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_rscc_wakeup_sequence - Trigger rscc wakeup sequence
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_rscc_wakeup_sequence(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_halt_gbif - Halt CX and GX requests in GBIF
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Clear any pending GX or CX transactions in GBIF and
+ * deassert GBIF halt
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_halt_gbif(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_load_pdc_ucode - Load and enable pdc sequence
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_load_pdc_ucode(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_load_rsc_ucode - Load rscc sequence
+ * @adreno_dev: Pointer to the adreno device
+ */
+void a6xx_load_rsc_ucode(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_gmu_remove - Clean up gmu probed resources
+ * @device: Pointer to the kgsl device
+ */
+void a6xx_gmu_remove(struct kgsl_device *device);
+
+/**
+ * a6xx_gmu_enable_clks - Enable gmu clocks
+ * @adreno_dev: Pointer to the adreno device
+ * @level: GMU frequency level
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_gmu_enable_clks(struct adreno_device *adreno_dev, u32 level);
+
+/**
+ * a6xx_gmu_handle_watchdog - Handle watchdog interrupt
+ * @adreno_dev: Pointer to the adreno device
+ */
+void a6xx_gmu_handle_watchdog(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_gmu_send_nmi - Send NMI to GMU
+ * @device: Pointer to the kgsl device
+ * @force: Boolean to forcefully send NMI irrespective of GMU state
+ */
+void a6xx_gmu_send_nmi(struct kgsl_device *device, bool force);
+
+/**
+ * a6xx_gmu_add_to_minidump - Register a6xx_device with va minidump
+ * @adreno_dev: Pointer to the adreno device
+ */
+int a6xx_gmu_add_to_minidump(struct adreno_device *adreno_dev);
+
+#endif
--- a/qcom/opensource/graphics-kernel/adreno_a6xx_gmu_snapshot.c
+++ b/qcom/opensource/graphics-kernel/adreno_a6xx_gmu_snapshot.c
@ -0,0 +1,469 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include "a6xx_reg.h"
+#include "adreno.h"
+#include "adreno_a6xx.h"
+#include "adreno_a6xx_gmu.h"
+#include "adreno_snapshot.h"
+#include "kgsl_device.h"
+
+static const unsigned int a6xx_gmu_gx_registers[] = {
+	/* GMU GX */
+	0x1A800, 0x1A800, 0x1A810, 0x1A813, 0x1A816, 0x1A816, 0x1A818, 0x1A81B,
+	0x1A81E, 0x1A81E, 0x1A820, 0x1A823, 0x1A826, 0x1A826, 0x1A828, 0x1A82B,
+	0x1A82E, 0x1A82E, 0x1A830, 0x1A833, 0x1A836, 0x1A836, 0x1A838, 0x1A83B,
+	0x1A83E, 0x1A83E, 0x1A840, 0x1A843, 0x1A846, 0x1A846, 0x1A880, 0x1A884,
+	0x1A900, 0x1A92B, 0x1A940, 0x1A940,
+};
+
+static const unsigned int a6xx_gmu_tcm_registers[] = {
+	/* ITCM */
+	0x1B400, 0x1C3FF,
+	/* DTCM */
+	0x1C400, 0x1D3FF,
+};
+
+static const unsigned int a6xx_gmu_registers[] = {
+	/* GMU CX */
+	0x1F400, 0x1F407, 0x1F410, 0x1F412, 0x1F500, 0x1F500, 0x1F507, 0x1F50A,
+	0x1F800, 0x1F804, 0x1F807, 0x1F808, 0x1F80B, 0x1F80C, 0x1F80F, 0x1F81C,
+	0x1F824, 0x1F82A, 0x1F82D, 0x1F830, 0x1F840, 0x1F853, 0x1F887, 0x1F889,
+	0x1F8A0, 0x1F8A2, 0x1F8A4, 0x1F8AF, 0x1F8C0, 0x1F8C3, 0x1F8D0, 0x1F8D0,
+	0x1F8E4, 0x1F8E4, 0x1F8E8, 0x1F8EC, 0x1F900, 0x1F903, 0x1F940, 0x1F940,
+	0x1F942, 0x1F944, 0x1F94C, 0x1F94D, 0x1F94F, 0x1F951, 0x1F954, 0x1F954,
+	0x1F957, 0x1F958, 0x1F95D, 0x1F95D, 0x1F962, 0x1F962, 0x1F964, 0x1F965,
+	0x1F980, 0x1F986, 0x1F990, 0x1F99E, 0x1F9C0, 0x1F9C0, 0x1F9C5, 0x1F9CC,
+	0x1F9E0, 0x1F9E2, 0x1F9F0, 0x1F9F0, 0x1FA00, 0x1FA01,
+	/* GMU AO */
+	0x23B00, 0x23B16,
+};
+
+static const unsigned int a660_gmu_registers[] = {
+	/* GMU CX */
+	0x1F408, 0x1F40D, 0x1F40F, 0x1F40F, 0x1F50B, 0x1F50B, 0x1F860, 0x1F860,
+	0x1F870, 0x1F877, 0x1F8C4, 0x1F8C4, 0x1F8F0, 0x1F8F1, 0x1F948, 0x1F94A,
+	0x1F966, 0x1F96B, 0x1F970, 0x1F970, 0x1F972, 0x1F979, 0x1F9CD, 0x1F9D4,
+	0x1FA02, 0x1FA03, 0x20000, 0x20001, 0x20004, 0x20004, 0x20008, 0x20012,
+	0x20018, 0x20018,
+	/* GMU AO LPAC */
+	0x23B30, 0x23B30,
+};
+
+static const unsigned int a6xx_gmu_gpucc_registers[] = {
+	/* GPU CC */
+	0x24000, 0x24012, 0x24040, 0x24052, 0x24400, 0x24404, 0x24407, 0x2440B,
+	0x24415, 0x2441C, 0x2441E, 0x2442D, 0x2443C, 0x2443D, 0x2443F, 0x24440,
+	0x24442, 0x24449, 0x24458, 0x2445A, 0x24540, 0x2455E, 0x24800, 0x24802,
+	0x24C00, 0x24C02, 0x25400, 0x25402, 0x25800, 0x25802, 0x25C00, 0x25C02,
+	0x26000, 0x26002,
+	/* GPU CC ACD */
+	0x26400, 0x26416, 0x26420, 0x26427,
+};
+
+static const unsigned int a662_gmu_gpucc_registers[] = {
+	/* GPU CC */
+	0x24000, 0x2400e, 0x24400, 0x2440e, 0x24800, 0x24805, 0x24c00, 0x24cff,
+	0x25800, 0x25804, 0x25c00, 0x25c04, 0x26000, 0x26004, 0x26400, 0x26405,
+	0x26414, 0x2641d, 0x2642a, 0x26430, 0x26432, 0x26432, 0x26441, 0x26455,
+	0x26466, 0x26468, 0x26478, 0x2647a, 0x26489, 0x2648a, 0x2649c, 0x2649e,
+	0x264a0, 0x264a3, 0x264b3, 0x264b5, 0x264c5, 0x264c7, 0x264d6, 0x264d8,
+	0x264e8, 0x264e9, 0x264f9, 0x264fc, 0x2650b, 0x2650c, 0x2651c, 0x2651e,
+	0x26540, 0x26570, 0x26600, 0x26616, 0x26620, 0x2662d,
+};
+
+static const unsigned int a663_gmu_gpucc_registers[] = {
+	/* GPU CC */
+	0x24000, 0x2400e, 0x24400, 0x2440e, 0x25800, 0x25804, 0x25c00, 0x25c04,
+	0x26000, 0x26004, 0x26400, 0x26405, 0x26414, 0x2641d, 0x2642a, 0x26430,
+	0x26432, 0x26432, 0x26441, 0x26455, 0x26466, 0x26468, 0x26478, 0x2647a,
+	0x26489, 0x2648a, 0x2649c, 0x2649e, 0x264a0, 0x264a3, 0x264b3, 0x264b5,
+	0x264c5, 0x264c7, 0x264d6, 0x264d8, 0x264e8, 0x264e9, 0x264f9, 0x264fc,
+	0x2650b, 0x2650c, 0x2651c, 0x2651e, 0x26540, 0x26570, 0x26600, 0x26616,
+	0x26620, 0x2662d,
+};
+
+static const unsigned int a630_rscc_snapshot_registers[] = {
+	0x23400, 0x23434, 0x23436, 0x23436, 0x23480, 0x23484, 0x23489, 0x2348C,
+	0x23491, 0x23494, 0x23499, 0x2349C, 0x234A1, 0x234A4, 0x234A9, 0x234AC,
+	0x23500, 0x23502, 0x23504, 0x23507, 0x23514, 0x23519, 0x23524, 0x2352B,
+	0x23580, 0x23597, 0x23740, 0x23741, 0x23744, 0x23747, 0x2374C, 0x23787,
+	0x237EC, 0x237EF, 0x237F4, 0x2382F, 0x23894, 0x23897, 0x2389C, 0x238D7,
+	0x2393C, 0x2393F, 0x23944, 0x2397F,
+};
+
+static const unsigned int a6xx_rscc_snapshot_registers[] = {
+	0x23400, 0x23434, 0x23436, 0x23436, 0x23440, 0x23440, 0x23480, 0x23484,
+	0x23489, 0x2348C, 0x23491, 0x23494, 0x23499, 0x2349C, 0x234A1, 0x234A4,
+	0x234A9, 0x234AC, 0x23500, 0x23502, 0x23504, 0x23507, 0x23514, 0x23519,
+	0x23524, 0x2352B, 0x23580, 0x23597, 0x23740, 0x23741, 0x23744, 0x23747,
+	0x2374C, 0x23787, 0x237EC, 0x237EF, 0x237F4, 0x2382F, 0x23894, 0x23897,
+	0x2389C, 0x238D7, 0x2393C, 0x2393F, 0x23944, 0x2397F,
+};
+
+static const unsigned int a650_rscc_registers[] = {
+	0x38000, 0x38034, 0x38036, 0x38036, 0x38040, 0x38042, 0x38080, 0x38084,
+	0x38089, 0x3808C, 0x38091, 0x38094, 0x38099, 0x3809C, 0x380A1, 0x380A4,
+	0x380A9, 0x380AC, 0x38100, 0x38102, 0x38104, 0x38107, 0x38114, 0x38119,
+	0x38124, 0x3812E, 0x38180, 0x38197, 0x38340, 0x38341, 0x38344, 0x38347,
+	0x3834C, 0x3834F, 0x38351, 0x38354, 0x38356, 0x38359, 0x3835B, 0x3835E,
+	0x38360, 0x38363, 0x38365, 0x38368, 0x3836A, 0x3836D, 0x3836F, 0x38372,
+	0x383EC, 0x383EF, 0x383F4, 0x383F7, 0x383F9, 0x383FC, 0x383FE, 0x38401,
+	0x38403, 0x38406, 0x38408, 0x3840B, 0x3840D, 0x38410, 0x38412, 0x38415,
+	0x38417, 0x3841A, 0x38494, 0x38497, 0x3849C, 0x3849F, 0x384A1, 0x384A4,
+	0x384A6, 0x384A9, 0x384AB, 0x384AE, 0x384B0, 0x384B3, 0x384B5, 0x384B8,
+	0x384BA, 0x384BD, 0x384BF, 0x384C2, 0x3853C, 0x3853F, 0x38544, 0x38547,
+	0x38549, 0x3854C, 0x3854E, 0x38551, 0x38553, 0x38556, 0x38558, 0x3855B,
+	0x3855D, 0x38560, 0x38562, 0x38565, 0x38567, 0x3856A, 0x385E4, 0x385E7,
+	0x385EC, 0x385EF, 0x385F1, 0x385F4, 0x385F6, 0x385F9, 0x385FB, 0x385FE,
+	0x38600, 0x38603, 0x38605, 0x38608, 0x3860A, 0x3860D, 0x3860F, 0x38612,
+	0x3868C, 0x3868F, 0x38694, 0x38697, 0x38699, 0x3869C, 0x3869E, 0x386A1,
+	0x386A3, 0x386A6, 0x386A8, 0x386AB, 0x386AD, 0x386B0, 0x386B2, 0x386B5,
+	0x386B7, 0x386BA, 0x38734, 0x38737, 0x3873C, 0x3873F, 0x38741, 0x38744,
+	0x38746, 0x38749, 0x3874B, 0x3874E, 0x38750, 0x38753, 0x38755, 0x38758,
+	0x3875A, 0x3875D, 0x3875F, 0x38762, 0x387DC, 0x387DF, 0x387E4, 0x387E7,
+	0x387E9, 0x387EC, 0x387EE, 0x387F1, 0x387F3, 0x387F6, 0x387F8, 0x387FB,
+	0x387FD, 0x38800, 0x38802, 0x38805, 0x38807, 0x3880A, 0x38884, 0x38887,
+	0x3888C, 0x3888F, 0x38891, 0x38894, 0x38896, 0x38899, 0x3889B, 0x3889E,
+	0x388A0, 0x388A3, 0x388A5, 0x388A8, 0x388AA, 0x388AD, 0x388AF, 0x388B2,
+	0x3892C, 0x3892F, 0x38934, 0x38937, 0x38939, 0x3893C, 0x3893E, 0x38941,
+	0x38943, 0x38946, 0x38948, 0x3894B, 0x3894D, 0x38950, 0x38952, 0x38955,
+	0x38957, 0x3895A, 0x38B50, 0x38B51, 0x38B53, 0x38B55, 0x38B5A, 0x38B5A,
+	0x38B5F, 0x38B5F, 0x38B64, 0x38B64, 0x38B69, 0x38B69, 0x38B6E, 0x38B6E,
+	0x38B73, 0x38B73, 0x38BF8, 0x38BF8, 0x38BFD, 0x38BFD, 0x38C02, 0x38C02,
+	0x38C07, 0x38C07, 0x38C0C, 0x38C0C, 0x38C11, 0x38C11, 0x38C16, 0x38C16,
+	0x38C1B, 0x38C1B, 0x38CA0, 0x38CA0, 0x38CA5, 0x38CA5, 0x38CAA, 0x38CAA,
+	0x38CAF, 0x38CAF, 0x38CB4, 0x38CB4, 0x38CB9, 0x38CB9, 0x38CBE, 0x38CBE,
+	0x38CC3, 0x38CC3, 0x38D48, 0x38D48, 0x38D4D, 0x38D4D, 0x38D52, 0x38D52,
+	0x38D57, 0x38D57, 0x38D5C, 0x38D5C, 0x38D61, 0x38D61, 0x38D66, 0x38D66,
+	0x38D6B, 0x38D6B, 0x38DF0, 0x38DF0, 0x38DF5, 0x38DF5, 0x38DFA, 0x38DFA,
+	0x38DFF, 0x38DFF, 0x38E04, 0x38E04, 0x38E09, 0x38E09, 0x38E0E, 0x38E0E,
+	0x38E13, 0x38E13, 0x38E98, 0x38E98, 0x38E9D, 0x38E9D, 0x38EA2, 0x38EA2,
+	0x38EA7, 0x38EA7, 0x38EAC, 0x38EAC, 0x38EB1, 0x38EB1, 0x38EB6, 0x38EB6,
+	0x38EBB, 0x38EBB, 0x38F40, 0x38F40, 0x38F45, 0x38F45, 0x38F4A, 0x38F4A,
+	0x38F4F, 0x38F4F, 0x38F54, 0x38F54, 0x38F59, 0x38F59, 0x38F5E, 0x38F5E,
+	0x38F63, 0x38F63, 0x38FE8, 0x38FE8, 0x38FED, 0x38FED, 0x38FF2, 0x38FF2,
+	0x38FF7, 0x38FF7, 0x38FFC, 0x38FFC, 0x39001, 0x39001, 0x39006, 0x39006,
+	0x3900B, 0x3900B, 0x39090, 0x39090, 0x39095, 0x39095, 0x3909A, 0x3909A,
+	0x3909F, 0x3909F, 0x390A4, 0x390A4, 0x390A9, 0x390A9, 0x390AE, 0x390AE,
+	0x390B3, 0x390B3, 0x39138, 0x39138, 0x3913D, 0x3913D, 0x39142, 0x39142,
+	0x39147, 0x39147, 0x3914C, 0x3914C, 0x39151, 0x39151, 0x39156, 0x39156,
+	0x3915B, 0x3915B,
+};
+
+static size_t a6xx_snapshot_gmu_mem(struct kgsl_device *device,
+		u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_gmu_mem *mem_hdr =
+		(struct kgsl_snapshot_gmu_mem *)buf;
+	unsigned int *data = (unsigned int *)
+		(buf + sizeof(*mem_hdr));
+	struct gmu_mem_type_desc *desc = priv;
+
+	if (priv == NULL || desc->memdesc->hostptr == NULL)
+		return 0;
+
+	if (remain < desc->memdesc->size + sizeof(*mem_hdr)) {
+		dev_err(device->dev,
+			"snapshot: Not enough memory for the gmu section %d\n",
+			desc->type);
+		return 0;
+	}
+
+	memset(mem_hdr, 0, sizeof(*mem_hdr));
+	mem_hdr->type = desc->type;
+	mem_hdr->hostaddr = (uintptr_t)desc->memdesc->hostptr;
+	mem_hdr->gmuaddr = desc->memdesc->gmuaddr;
+	mem_hdr->gpuaddr = 0;
+
+	/* Just copy the ringbuffer, there are no active IBs */
+	memcpy(data, desc->memdesc->hostptr, desc->memdesc->size);
+
+	return desc->memdesc->size + sizeof(*mem_hdr);
+}
+
+static size_t a6xx_gmu_snapshot_dtcm(struct kgsl_device *device,
+		u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_gmu_mem *mem_hdr =
+		(struct kgsl_snapshot_gmu_mem *)buf;
+	struct a6xx_gmu_device *gmu = (struct a6xx_gmu_device *)priv;
+	u32 *data = (u32 *)(buf + sizeof(*mem_hdr));
+	u32 i;
+
+	if (remain < gmu->vma[GMU_DTCM].size + sizeof(*mem_hdr)) {
+		SNAPSHOT_ERR_NOMEM(device, "GMU DTCM Memory");
+		return 0;
+	}
+
+	mem_hdr->type = SNAPSHOT_GMU_MEM_BIN_BLOCK;
+	mem_hdr->hostaddr = 0;
+	mem_hdr->gmuaddr = gmu->vma[GMU_DTCM].start;
+	mem_hdr->gpuaddr = 0;
+
+	/* FIXME: use a bulk read? */
+	for (i = 0; i < (gmu->vma[GMU_DTCM].size >> 2); i++)
+		gmu_core_regread(device, A6XX_GMU_CM3_DTCM_START + i, data++);
+
+	return gmu->vma[GMU_DTCM].size + sizeof(*mem_hdr);
+}
+
+static size_t a6xx_gmu_snapshot_itcm(struct kgsl_device *device,
+	u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_gmu_mem *mem_hdr =
+			(struct kgsl_snapshot_gmu_mem *)buf;
+	void *dest = buf + sizeof(*mem_hdr);
+	struct a6xx_gmu_device *gmu = (struct a6xx_gmu_device *)priv;
+
+	if (!gmu->itcm_shadow) {
+		dev_err(&gmu->pdev->dev, "ITCM not captured\n");
+		return 0;
+	}
+
+	if (remain < gmu->vma[GMU_ITCM].size + sizeof(*mem_hdr)) {
+		SNAPSHOT_ERR_NOMEM(device, "GMU ITCM Memory");
+		return 0;
+	}
+
+	mem_hdr->type = SNAPSHOT_GMU_MEM_BIN_BLOCK;
+	mem_hdr->hostaddr = 0;
+	mem_hdr->gmuaddr = gmu->vma[GMU_ITCM].start;
+	mem_hdr->gpuaddr = 0;
+
+	memcpy(dest, gmu->itcm_shadow, gmu->vma[GMU_ITCM].size);
+
+	return gmu->vma[GMU_ITCM].size + sizeof(*mem_hdr);
+}
+
+static void a6xx_gmu_snapshot_memories(struct kgsl_device *device,
+	struct a6xx_gmu_device *gmu, struct kgsl_snapshot *snapshot)
+{
+	struct gmu_mem_type_desc desc;
+	struct kgsl_memdesc *md;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(gmu->gmu_globals); i++) {
+
+		md = &gmu->gmu_globals[i];
+		if (!md->size)
+			continue;
+
+		desc.memdesc = md;
+		if (md == gmu->hfi.hfi_mem)
+			desc.type = SNAPSHOT_GMU_MEM_HFI;
+		else if (md == gmu->gmu_log)
+			desc.type = SNAPSHOT_GMU_MEM_LOG;
+		else if (md == gmu->dump_mem)
+			desc.type = SNAPSHOT_GMU_MEM_DEBUG;
+		else if (md == gmu->vrb)
+			desc.type = SNAPSHOT_GMU_MEM_VRB;
+		else if (md == gmu->trace.md)
+			desc.type = SNAPSHOT_GMU_MEM_TRACE;
+		else
+			desc.type = SNAPSHOT_GMU_MEM_BIN_BLOCK;
+
+		kgsl_snapshot_add_section(device,
+			KGSL_SNAPSHOT_SECTION_GMU_MEMORY,
+			snapshot, a6xx_snapshot_gmu_mem, &desc);
+	}
+}
+
+struct kgsl_snapshot_gmu_version {
+	uint32_t type;
+	uint32_t value;
+};
+
+static size_t a6xx_snapshot_gmu_version(struct kgsl_device *device,
+		u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf;
+	uint32_t *data = (uint32_t *) (buf + sizeof(*header));
+	struct kgsl_snapshot_gmu_version *ver = priv;
+
+	if (remain < DEBUG_SECTION_SZ(1)) {
+		SNAPSHOT_ERR_NOMEM(device, "GMU Version");
+		return 0;
+	}
+
+	header->type = ver->type;
+	header->size = 1;
+
+	*data = ver->value;
+
+	return DEBUG_SECTION_SZ(1);
+}
+
+static void a6xx_gmu_snapshot_versions(struct kgsl_device *device,
+		struct a6xx_gmu_device *gmu,
+		struct kgsl_snapshot *snapshot)
+{
+	int i;
+
+	struct kgsl_snapshot_gmu_version gmu_vers[] = {
+		{ .type = SNAPSHOT_DEBUG_GMU_CORE_VERSION,
+			.value = gmu->ver.core, },
+		{ .type = SNAPSHOT_DEBUG_GMU_CORE_DEV_VERSION,
+			.value = gmu->ver.core_dev, },
+		{ .type = SNAPSHOT_DEBUG_GMU_PWR_VERSION,
+			.value = gmu->ver.pwr, },
+		{ .type = SNAPSHOT_DEBUG_GMU_PWR_DEV_VERSION,
+			.value = gmu->ver.pwr_dev, },
+		{ .type = SNAPSHOT_DEBUG_GMU_HFI_VERSION,
+			.value = gmu->ver.hfi, },
+	};
+
+	for (i = 0; i < ARRAY_SIZE(gmu_vers); i++)
+		kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+				snapshot, a6xx_snapshot_gmu_version,
+				&gmu_vers[i]);
+}
+
+#define RSCC_OFFSET_DWORDS 0x38000
+
+static size_t a6xx_snapshot_rscc_registers(struct kgsl_device *device, u8 *buf,
+	size_t remain, void *priv)
+{
+	struct kgsl_snapshot_regs *header = (struct kgsl_snapshot_regs *)buf;
+	struct kgsl_snapshot_registers *regs = priv;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	int count = 0, j, k;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+
+	/* Figure out how many registers we are going to dump */
+	for (j = 0; j < regs->count; j++) {
+		int start = regs->regs[j * 2];
+		int end = regs->regs[j * 2 + 1];
+
+		count += (end - start + 1);
+	}
+
+	if (remain < (count * 8) + sizeof(*header)) {
+		SNAPSHOT_ERR_NOMEM(device, "RSCC REGISTERS");
+		return 0;
+	}
+
+	for (j = 0; j < regs->count; j++) {
+		unsigned int start = regs->regs[j * 2];
+		unsigned int end = regs->regs[j * 2 + 1];
+
+		for (k = start; k <= end; k++) {
+			unsigned int val;
+
+			val = __raw_readl(gmu->rscc_virt +
+				((k - RSCC_OFFSET_DWORDS) << 2));
+			*data++ = k;
+			*data++ = val;
+		}
+	}
+
+	header->count = count;
+
+	/* Return the size of the section */
+	return (count * 8) + sizeof(*header);
+}
+
+static void snapshot_rscc_registers(struct adreno_device *adreno_dev,
+	struct kgsl_snapshot *snapshot)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	/* RSCC registers are on cx */
+	if (adreno_is_a650_family(adreno_dev)) {
+		struct kgsl_snapshot_registers r;
+
+		r.regs = a650_rscc_registers;
+		r.count = ARRAY_SIZE(a650_rscc_registers) / 2;
+
+		kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS,
+			snapshot, a6xx_snapshot_rscc_registers, &r);
+	} else if (adreno_is_a615_family(adreno_dev) ||
+			adreno_is_a630(adreno_dev)) {
+		adreno_snapshot_registers(device, snapshot,
+			a630_rscc_snapshot_registers,
+			ARRAY_SIZE(a630_rscc_snapshot_registers) / 2);
+	} else if (adreno_is_a640(adreno_dev) || adreno_is_a680(adreno_dev)) {
+		adreno_snapshot_registers(device, snapshot,
+			a6xx_rscc_snapshot_registers,
+			ARRAY_SIZE(a6xx_rscc_snapshot_registers) / 2);
+	}
+}
+
+/*
+ * a6xx_gmu_device_snapshot() - A6XX GMU snapshot function
+ * @device: Device being snapshotted
+ * @snapshot: Pointer to the snapshot instance
+ *
+ * This is where all of the A6XX GMU specific bits and pieces are grabbed
+ * into the snapshot memory
+ */
+void a6xx_gmu_device_snapshot(struct kgsl_device *device,
+	struct kgsl_snapshot *snapshot)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GMU_MEMORY,
+		snapshot, a6xx_gmu_snapshot_itcm, gmu);
+
+	a6xx_gmu_snapshot_versions(device, gmu, snapshot);
+
+	a6xx_gmu_snapshot_memories(device, gmu, snapshot);
+
+	/* Snapshot tcms as registers for legacy targets */
+	if (adreno_is_a630(adreno_dev) ||
+			adreno_is_a615_family(adreno_dev))
+		adreno_snapshot_registers(device, snapshot,
+				a6xx_gmu_tcm_registers,
+				ARRAY_SIZE(a6xx_gmu_tcm_registers) / 2);
+
+	adreno_snapshot_registers(device, snapshot, a6xx_gmu_registers,
+					ARRAY_SIZE(a6xx_gmu_registers) / 2);
+
+	if (adreno_is_a662(adreno_dev) || adreno_is_a621(adreno_dev))
+		adreno_snapshot_registers(device, snapshot,
+			a662_gmu_gpucc_registers,
+			ARRAY_SIZE(a662_gmu_gpucc_registers) / 2);
+	else if (adreno_is_a663(adreno_dev))
+		adreno_snapshot_registers(device, snapshot,
+			a663_gmu_gpucc_registers,
+			ARRAY_SIZE(a663_gmu_gpucc_registers) / 2);
+	else
+		adreno_snapshot_registers(device, snapshot,
+			a6xx_gmu_gpucc_registers,
+			ARRAY_SIZE(a6xx_gmu_gpucc_registers) / 2);
+
+	/* Snapshot A660 specific GMU registers */
+	if (adreno_is_a660(adreno_dev))
+		adreno_snapshot_registers(device, snapshot, a660_gmu_registers,
+					ARRAY_SIZE(a660_gmu_registers) / 2);
+
+	snapshot_rscc_registers(adreno_dev, snapshot);
+
+	if (!a6xx_gmu_gx_is_on(adreno_dev))
+		goto dtcm;
+
+	/* Set fence to ALLOW mode so registers can be read */
+	kgsl_regwrite(device, A6XX_GMU_AO_AHB_FENCE_CTRL, 0);
+	/* Make sure the previous write posted before reading */
+	wmb();
+
+	adreno_snapshot_registers(device, snapshot,
+			a6xx_gmu_gx_registers,
+			ARRAY_SIZE(a6xx_gmu_gx_registers) / 2);
+
+	/* A stalled SMMU can lead to NoC timeouts when host accesses DTCM */
+	if (adreno_smmu_is_stalled(adreno_dev)) {
+		dev_err(&gmu->pdev->dev,
+			"Not dumping dtcm because SMMU is stalled\n");
+		return;
+	}
+
+dtcm:
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GMU_MEMORY,
+		snapshot, a6xx_gmu_snapshot_dtcm, gmu);
+}
--- a/qcom/opensource/graphics-kernel/adreno_a6xx_hfi.c
+++ b/qcom/opensource/graphics-kernel/adreno_a6xx_hfi.c
@ -0,0 +1,852 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2018-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/delay.h>
+#include <linux/nvmem-consumer.h>
+
+#include "adreno.h"
+#include "adreno_a6xx.h"
+#include "adreno_a6xx_hfi.h"
+#include "kgsl_device.h"
+#include "kgsl_trace.h"
+
+/* Below section is for all structures related to HFI queues */
+#define HFI_QUEUE_MAX HFI_QUEUE_DEFAULT_CNT
+
+/* Total header sizes + queue sizes + 16 for alignment */
+#define HFIMEM_SIZE (sizeof(struct hfi_queue_table) + 16 + \
+		(HFI_QUEUE_SIZE * HFI_QUEUE_MAX))
+
+struct a6xx_hfi *to_a6xx_hfi(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+
+	return &gmu->hfi;
+}
+
+/* Size in below functions are in unit of dwords */
+int a6xx_hfi_queue_read(struct a6xx_gmu_device *gmu, uint32_t queue_idx,
+		unsigned int *output, unsigned int max_size)
+{
+	struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem;
+	struct hfi_queue_table *tbl = mem_addr->hostptr;
+	struct hfi_queue_header *hdr = &tbl->qhdr[queue_idx];
+	uint32_t *queue;
+	uint32_t msg_hdr;
+	uint32_t i, read;
+	uint32_t size;
+	int result = 0;
+
+	if (hdr->status == HFI_QUEUE_STATUS_DISABLED)
+		return -EINVAL;
+
+	if (hdr->read_index == hdr->write_index)
+		return -ENODATA;
+
+	/* Clear the output data before populating */
+	memset(output, 0, max_size);
+
+	queue = HOST_QUEUE_START_ADDR(mem_addr, queue_idx);
+	msg_hdr = queue[hdr->read_index];
+	size = MSG_HDR_GET_SIZE(msg_hdr);
+
+	if (size > (max_size >> 2)) {
+		dev_err(&gmu->pdev->dev,
+		"HFI message too big: hdr:0x%x rd idx=%d\n",
+			msg_hdr, hdr->read_index);
+		result = -EMSGSIZE;
+		goto done;
+	}
+
+	read = hdr->read_index;
+
+	if (read < hdr->queue_size) {
+		for (i = 0; i < size && i < (max_size >> 2); i++) {
+			output[i] = queue[read];
+			read = (read + 1)%hdr->queue_size;
+		}
+		result = size;
+	} else {
+		/* In case FW messed up */
+		dev_err(&gmu->pdev->dev,
+			"Read index %d greater than queue size %d\n",
+			hdr->read_index, hdr->queue_size);
+		result = -ENODATA;
+	}
+
+	if (GMU_VER_MAJOR(gmu->ver.hfi) >= 2)
+		read = ALIGN(read, SZ_4) % hdr->queue_size;
+
+	/* For acks, trace the packet for which this ack was sent */
+	if (MSG_HDR_GET_TYPE(msg_hdr) == HFI_MSG_ACK)
+		trace_kgsl_hfi_receive(MSG_HDR_GET_ID(output[1]),
+			MSG_HDR_GET_SIZE(output[1]),
+			MSG_HDR_GET_SEQNUM(output[1]));
+	else
+		trace_kgsl_hfi_receive(MSG_HDR_GET_ID(msg_hdr),
+			MSG_HDR_GET_SIZE(msg_hdr), MSG_HDR_GET_SEQNUM(msg_hdr));
+
+	hfi_update_read_idx(hdr, read);
+
+done:
+	return result;
+}
+
+/* Size in below functions are in unit of dwords */
+int a6xx_hfi_queue_write(struct adreno_device *adreno_dev, uint32_t queue_idx,
+		uint32_t *msg, u32 size_bytes)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct hfi_queue_table *tbl = gmu->hfi.hfi_mem->hostptr;
+	struct hfi_queue_header *hdr = &tbl->qhdr[queue_idx];
+	uint32_t *queue;
+	uint32_t i, write_idx, read_idx, empty_space;
+	uint32_t size_dwords = size_bytes >> 2;
+	u32 align_size = ALIGN(size_dwords, SZ_4);
+	uint32_t id = MSG_HDR_GET_ID(*msg);
+
+	if (hdr->status == HFI_QUEUE_STATUS_DISABLED || !IS_ALIGNED(size_bytes, sizeof(u32)))
+		return -EINVAL;
+
+	queue = HOST_QUEUE_START_ADDR(gmu->hfi.hfi_mem, queue_idx);
+
+	write_idx = hdr->write_index;
+	read_idx = hdr->read_index;
+
+	empty_space = (write_idx >= read_idx) ?
+			(hdr->queue_size - (write_idx - read_idx))
+			: (read_idx - write_idx);
+
+	if (empty_space <= align_size)
+		return -ENOSPC;
+
+	for (i = 0; i < size_dwords; i++) {
+		queue[write_idx] = msg[i];
+		write_idx = (write_idx + 1) % hdr->queue_size;
+	}
+
+	/* Cookify any non used data at the end of the write buffer */
+	if (GMU_VER_MAJOR(gmu->ver.hfi) >= 2) {
+		for (; i < align_size; i++) {
+			queue[write_idx] = 0xFAFAFAFA;
+			write_idx = (write_idx + 1) % hdr->queue_size;
+		}
+	}
+
+	trace_kgsl_hfi_send(id, size_dwords, MSG_HDR_GET_SEQNUM(*msg));
+
+	hfi_update_write_idx(&hdr->write_index, write_idx);
+
+	return 0;
+}
+
+int a6xx_hfi_cmdq_write(struct adreno_device *adreno_dev, u32 *msg, u32 size_bytes)
+{
+	int ret;
+
+	ret = a6xx_hfi_queue_write(adreno_dev, HFI_CMD_ID, msg, size_bytes);
+
+	/*
+	 * Memory barrier to make sure packet and write index are written before
+	 * an interrupt is raised
+	 */
+	wmb();
+
+	/* Send interrupt to GMU to receive the message */
+	if (!ret)
+		gmu_core_regwrite(KGSL_DEVICE(adreno_dev),
+			A6XX_GMU_HOST2GMU_INTR_SET,
+			0x1);
+
+	return ret;
+}
+
+/* Sizes of the queue and message are in unit of dwords */
+static void init_queues(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem;
+	int i;
+	struct hfi_queue_table *tbl;
+	struct hfi_queue_header *hdr;
+	struct {
+		unsigned int idx;
+		unsigned int pri;
+		unsigned int status;
+	} queue[HFI_QUEUE_MAX] = {
+		{ HFI_CMD_IDX, HFI_CMD_PRI, HFI_QUEUE_STATUS_ENABLED },
+		{ HFI_MSG_IDX, HFI_MSG_PRI, HFI_QUEUE_STATUS_ENABLED },
+		{ HFI_DBG_IDX, HFI_DBG_PRI, HFI_QUEUE_STATUS_ENABLED },
+	};
+
+	/*
+	 * Overwrite the queue IDs for A630, A615 and A616 as they use
+	 * legacy firmware. Legacy firmware has different queue IDs for
+	 * message, debug and dispatch queues (dispatch queues aren't used
+	 * on these targets so the queue idx value update is not needed).
+	 */
+	if (adreno_is_a630(adreno_dev) || adreno_is_a615_family(adreno_dev)) {
+		queue[HFI_MSG_ID].idx = HFI_MSG_IDX_LEGACY;
+		queue[HFI_DBG_ID].idx = HFI_DBG_IDX_LEGACY;
+	}
+
+	/* Fill Table Header */
+	tbl = mem_addr->hostptr;
+	tbl->qtbl_hdr.version = 0;
+	tbl->qtbl_hdr.size = sizeof(struct hfi_queue_table) >> 2;
+	tbl->qtbl_hdr.qhdr0_offset = sizeof(struct hfi_queue_table_header) >> 2;
+	tbl->qtbl_hdr.qhdr_size = sizeof(struct hfi_queue_header) >> 2;
+	tbl->qtbl_hdr.num_q = HFI_QUEUE_MAX;
+	tbl->qtbl_hdr.num_active_q = HFI_QUEUE_MAX;
+
+	memset(&tbl->qhdr[0], 0, sizeof(tbl->qhdr));
+
+	/* Fill Individual Queue Headers */
+	for (i = 0; i < HFI_QUEUE_MAX; i++) {
+		hdr = &tbl->qhdr[i];
+		hdr->start_addr = GMU_QUEUE_START_ADDR(mem_addr->gmuaddr, i);
+		hdr->type = QUEUE_HDR_TYPE(queue[i].idx, queue[i].pri, 0,  0);
+		hdr->status = queue[i].status;
+		hdr->queue_size = HFI_QUEUE_SIZE >> 2; /* convert to dwords */
+	}
+}
+
+int a6xx_hfi_init(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct a6xx_hfi *hfi = &gmu->hfi;
+
+	/* Allocates & maps memory for HFI */
+	if (IS_ERR_OR_NULL(hfi->hfi_mem)) {
+		hfi->hfi_mem = reserve_gmu_kernel_block(gmu, 0, HFIMEM_SIZE,
+			GMU_NONCACHED_KERNEL, 0);
+		if (!IS_ERR(hfi->hfi_mem))
+			init_queues(adreno_dev);
+	}
+
+	return PTR_ERR_OR_ZERO(hfi->hfi_mem);
+}
+
+int a6xx_receive_ack_cmd(struct a6xx_gmu_device *gmu, void *rcvd,
+	struct pending_cmd *ret_cmd)
+{
+	struct adreno_device *adreno_dev = a6xx_gmu_to_adreno(gmu);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	uint32_t *ack = rcvd;
+	uint32_t hdr = ack[0];
+	uint32_t req_hdr = ack[1];
+
+	if (ret_cmd == NULL)
+		return -EINVAL;
+
+	if (CMP_HFI_ACK_HDR(ret_cmd->sent_hdr, req_hdr)) {
+		memcpy(&ret_cmd->results, ack, MSG_HDR_GET_SIZE(hdr) << 2);
+		return 0;
+	}
+
+	/* Didn't find the sender, list the waiter */
+	dev_err_ratelimited(&gmu->pdev->dev,
+		"HFI ACK: Cannot find sender for 0x%8.8x Waiter: 0x%8.8x\n",
+		req_hdr, ret_cmd->sent_hdr);
+
+	gmu_core_fault_snapshot(device);
+
+	return -ENODEV;
+}
+
+static int poll_gmu_reg(struct adreno_device *adreno_dev,
+	u32 offsetdwords, unsigned int expected_val,
+	unsigned int mask, unsigned int timeout_ms)
+{
+	unsigned int val;
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned long timeout = jiffies + msecs_to_jiffies(timeout_ms);
+	u64 ao_pre_poll, ao_post_poll;
+	bool nmi = false;
+
+	ao_pre_poll = a6xx_read_alwayson(adreno_dev);
+
+	/* FIXME: readl_poll_timeout? */
+	while (time_is_after_jiffies(timeout)) {
+		gmu_core_regread(device, offsetdwords, &val);
+		if ((val & mask) == expected_val)
+			return 0;
+
+		/*
+		 * If GMU firmware fails any assertion, error message is sent
+		 * to KMD and NMI is triggered. So check if GMU is in NMI and
+		 * timeout early. Bits [11:9] of A6XX_GMU_CM3_FW_INIT_RESULT
+		 * contain GMU reset status. Non zero value here indicates that
+		 * GMU reset is active, NMI handler would eventually complete
+		 * and GMU would wait for recovery.
+		 */
+		gmu_core_regread(device, A6XX_GMU_CM3_FW_INIT_RESULT, &val);
+		if (val & 0xE00) {
+			nmi = true;
+			break;
+		}
+
+		usleep_range(10, 100);
+	}
+
+	ao_post_poll = a6xx_read_alwayson(adreno_dev);
+
+	/* Check one last time */
+	gmu_core_regread(device, offsetdwords, &val);
+	if ((val & mask) == expected_val)
+		return 0;
+
+	dev_err(&gmu->pdev->dev, "kgsl hfi poll %s: always on: %lld ms\n",
+		nmi ? "abort" : "timeout",
+		div_u64((ao_post_poll - ao_pre_poll) * 52, USEC_PER_SEC));
+
+	return -ETIMEDOUT;
+}
+
+static int a6xx_hfi_send_cmd_wait_inline(struct adreno_device *adreno_dev,
+	void *data, u32 size_bytes, struct pending_cmd *ret_cmd)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int rc;
+	uint32_t *cmd = data;
+	struct a6xx_hfi *hfi = &gmu->hfi;
+	unsigned int seqnum = atomic_inc_return(&hfi->seqnum);
+
+	*cmd = MSG_HDR_SET_SEQNUM_SIZE(*cmd, seqnum, size_bytes >> 2);
+	if (ret_cmd == NULL)
+		return a6xx_hfi_cmdq_write(adreno_dev, cmd, size_bytes);
+
+	ret_cmd->sent_hdr = cmd[0];
+
+	rc = a6xx_hfi_cmdq_write(adreno_dev, cmd, size_bytes);
+	if (rc)
+		return rc;
+
+	rc = poll_gmu_reg(adreno_dev, A6XX_GMU_GMU2HOST_INTR_INFO,
+		HFI_IRQ_MSGQ_MASK, HFI_IRQ_MSGQ_MASK, HFI_RSP_TIMEOUT);
+
+	if (rc) {
+		gmu_core_fault_snapshot(device);
+		dev_err(&gmu->pdev->dev,
+		"Timed out waiting on ack for 0x%8.8x (id %d, sequence %d)\n",
+		cmd[0], MSG_HDR_GET_ID(*cmd), MSG_HDR_GET_SEQNUM(*cmd));
+		return rc;
+	}
+
+	/* Clear the interrupt */
+	gmu_core_regwrite(device, A6XX_GMU_GMU2HOST_INTR_CLR,
+		HFI_IRQ_MSGQ_MASK);
+
+	rc = a6xx_hfi_process_queue(gmu, HFI_MSG_ID, ret_cmd);
+
+	return rc;
+}
+
+int a6xx_hfi_send_generic_req(struct adreno_device *adreno_dev, void *cmd, u32 size_bytes)
+{
+	struct pending_cmd ret_cmd;
+	int rc;
+
+	memset(&ret_cmd, 0, sizeof(ret_cmd));
+
+	rc = a6xx_hfi_send_cmd_wait_inline(adreno_dev, cmd, size_bytes, &ret_cmd);
+	if (rc)
+		return rc;
+
+	if (ret_cmd.results[2]) {
+		struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+		struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+		gmu_core_fault_snapshot(device);
+		dev_err(&gmu->pdev->dev,
+				"HFI ACK failure: Req=0x%8.8X, Result=0x%8.8X\n",
+				ret_cmd.results[1],
+				ret_cmd.results[2]);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int a6xx_hfi_send_gmu_init(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct hfi_gmu_init_cmd cmd = {
+		.seg_id = 0,
+		.dbg_buffer_addr = (unsigned int) gmu->dump_mem->gmuaddr,
+		.dbg_buffer_size = (unsigned int) gmu->dump_mem->size,
+		.boot_state = 0x1,
+	};
+	int ret;
+
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_INIT);
+	if (ret)
+		return ret;
+
+	return a6xx_hfi_send_generic_req(adreno_dev, &cmd, sizeof(cmd));
+}
+
+static int a6xx_hfi_get_fw_version(struct adreno_device *adreno_dev,
+		uint32_t expected_ver, uint32_t *ver)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct hfi_fw_version_cmd cmd = {
+		.supported_ver = expected_ver,
+	};
+	int rc;
+	struct pending_cmd ret_cmd;
+
+	rc = CMD_MSG_HDR(cmd, H2F_MSG_FW_VER);
+	if (rc)
+		return rc;
+
+	memset(&ret_cmd, 0, sizeof(ret_cmd));
+
+	rc = a6xx_hfi_send_cmd_wait_inline(adreno_dev, &cmd, sizeof(cmd), &ret_cmd);
+	if (rc)
+		return rc;
+
+	rc = ret_cmd.results[2];
+	if (!rc)
+		*ver = ret_cmd.results[3];
+	else
+		dev_err(&gmu->pdev->dev,
+			"gmu get fw ver failed with error=%d\n", rc);
+
+	return rc;
+}
+
+int a6xx_hfi_send_core_fw_start(struct adreno_device *adreno_dev)
+{
+	struct hfi_core_fw_start_cmd cmd = {
+		.handle = 0x0,
+	};
+	int ret;
+
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_CORE_FW_START);
+	if (ret)
+		return ret;
+
+	return a6xx_hfi_send_generic_req(adreno_dev, &cmd, sizeof(cmd));
+}
+
+static const char *feature_to_string(uint32_t feature)
+{
+	if (feature == HFI_FEATURE_ACD)
+		return "ACD";
+	else if (feature == HFI_FEATURE_LM)
+		return "LM";
+
+	return "unknown";
+}
+
+int a6xx_hfi_send_feature_ctrl(struct adreno_device *adreno_dev,
+	uint32_t feature, uint32_t enable, uint32_t data)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct hfi_feature_ctrl_cmd cmd = {
+		.feature = feature,
+		.enable = enable,
+		.data = data,
+	};
+	int ret;
+
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_FEATURE_CTRL);
+	if (ret)
+		return ret;
+
+	ret = a6xx_hfi_send_generic_req(adreno_dev, &cmd, sizeof(cmd));
+	if (ret)
+		dev_err(&gmu->pdev->dev,
+				"Unable to %s feature %s (%d)\n",
+				enable ? "enable" : "disable",
+				feature_to_string(feature),
+				feature);
+	return ret;
+}
+
+int a6xx_hfi_send_set_value(struct adreno_device *adreno_dev,
+		u32 type, u32 subtype, u32 data)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct hfi_set_value_cmd cmd = {
+		.type = type,
+		.subtype = subtype,
+		.data = data,
+	};
+	int ret;
+
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_SET_VALUE);
+	if (ret)
+		return ret;
+
+	ret = a6xx_hfi_send_generic_req(adreno_dev, &cmd, sizeof(cmd));
+	if (ret)
+		dev_err(&gmu->pdev->dev,
+			"Unable to set HFI Value %d, %d to %d, error = %d\n",
+			type, subtype, data, ret);
+	return ret;
+}
+
+static int a6xx_hfi_send_dcvstbl_v1(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct hfi_dcvstable_cmd *table = &gmu->hfi.dcvs_table;
+	struct hfi_dcvstable_v1_cmd cmd = {
+		.gpu_level_num = table->gpu_level_num,
+		.gmu_level_num = table->gmu_level_num,
+	};
+	int i, ret;
+
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_PERF_TBL);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < table->gpu_level_num; i++) {
+		cmd.gx_votes[i].vote = table->gx_votes[i].vote;
+		cmd.gx_votes[i].freq = table->gx_votes[i].freq;
+	}
+
+	cmd.cx_votes[0].vote = table->cx_votes[0].vote;
+	cmd.cx_votes[0].freq = table->cx_votes[0].freq;
+	cmd.cx_votes[1].vote = table->cx_votes[1].vote;
+	cmd.cx_votes[1].freq = table->cx_votes[1].freq;
+
+	return a6xx_hfi_send_generic_req(adreno_dev, &cmd, sizeof(cmd));
+}
+
+static int a6xx_hfi_send_test(struct adreno_device *adreno_dev)
+{
+	struct hfi_test_cmd cmd;
+	int ret;
+
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_TEST);
+	if (ret)
+		return ret;
+
+	cmd.data = 0;
+
+	return a6xx_hfi_send_generic_req(adreno_dev, &cmd, sizeof(cmd));
+}
+
+void adreno_a6xx_receive_err_req(struct a6xx_gmu_device *gmu, void *rcvd)
+{
+	struct hfi_err_cmd *cmd = rcvd;
+
+	dev_err(&gmu->pdev->dev, "HFI Error Received: %d %d %.16s\n",
+			((cmd->error_code >> 16) & 0xFFFF),
+			(cmd->error_code & 0xFFFF),
+			(char *) cmd->data);
+}
+
+void adreno_a6xx_receive_debug_req(struct a6xx_gmu_device *gmu, void *rcvd)
+{
+	struct hfi_debug_cmd *cmd = rcvd;
+
+	dev_dbg(&gmu->pdev->dev, "HFI Debug Received: %d %d %d\n",
+			cmd->type, cmd->timestamp, cmd->data);
+}
+
+static void a6xx_hfi_v1_receiver(struct a6xx_gmu_device *gmu, uint32_t *rcvd,
+	struct pending_cmd *ret_cmd)
+{
+	/* V1 ACK Handler */
+	if (MSG_HDR_GET_TYPE(rcvd[0]) == HFI_V1_MSG_ACK) {
+		a6xx_receive_ack_cmd(gmu, rcvd, ret_cmd);
+		return;
+	}
+
+	/* V1 Request Handler */
+	switch (MSG_HDR_GET_ID(rcvd[0])) {
+	case F2H_MSG_ERR: /* No Reply */
+		adreno_a6xx_receive_err_req(gmu, rcvd);
+		break;
+	case F2H_MSG_DEBUG: /* No Reply */
+		adreno_a6xx_receive_debug_req(gmu, rcvd);
+		break;
+	default: /* No Reply */
+		dev_err(&gmu->pdev->dev,
+				"HFI V1 request %d not supported\n",
+				MSG_HDR_GET_ID(rcvd[0]));
+		break;
+	}
+}
+
+int a6xx_hfi_process_queue(struct a6xx_gmu_device *gmu,
+		uint32_t queue_idx, struct pending_cmd *ret_cmd)
+{
+	uint32_t rcvd[MAX_RCVD_SIZE];
+
+	while (a6xx_hfi_queue_read(gmu, queue_idx, rcvd, sizeof(rcvd)) > 0) {
+		/* Special case if we're v1 */
+		if (GMU_VER_MAJOR(gmu->ver.hfi) < 2) {
+			a6xx_hfi_v1_receiver(gmu, rcvd, ret_cmd);
+			continue;
+		}
+
+		/* V2 ACK Handler */
+		if (MSG_HDR_GET_TYPE(rcvd[0]) == HFI_MSG_ACK) {
+			int ret = a6xx_receive_ack_cmd(gmu, rcvd, ret_cmd);
+
+			if (ret)
+				return ret;
+			continue;
+		}
+
+		/* V2 Request Handler */
+		switch (MSG_HDR_GET_ID(rcvd[0])) {
+		case F2H_MSG_ERR: /* No Reply */
+			adreno_a6xx_receive_err_req(gmu, rcvd);
+			break;
+		case F2H_MSG_DEBUG: /* No Reply */
+			adreno_a6xx_receive_debug_req(gmu, rcvd);
+			break;
+		default: /* No Reply */
+			dev_err(&gmu->pdev->dev,
+				"HFI request %d not supported\n",
+				MSG_HDR_GET_ID(rcvd[0]));
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static int a6xx_hfi_verify_fw_version(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	const struct adreno_a6xx_core *a6xx_core = to_a6xx_core(adreno_dev);
+	int result;
+	unsigned int ver, major, minor;
+
+	/* GMU version is already known, so don't waste time finding again */
+	if (gmu->ver.core != 0)
+		return 0;
+
+	major = a6xx_core->gmu_major;
+	minor = a6xx_core->gmu_minor;
+
+	result = a6xx_hfi_get_fw_version(adreno_dev, GMU_VERSION(major, minor, 0),
+			&ver);
+	if (result) {
+		dev_err_once(&gmu->pdev->dev,
+				"Failed to get FW version via HFI\n");
+		return result;
+	}
+
+	/* For now, warn once. Could return error later if needed */
+	if (major != GMU_VER_MAJOR(ver))
+		dev_err_once(&gmu->pdev->dev,
+				"FW Major Error: Wanted %d, got %d\n",
+				major, GMU_VER_MAJOR(ver));
+
+	if (minor > GMU_VER_MINOR(ver))
+		dev_err_once(&gmu->pdev->dev,
+				"FW Minor Error: Wanted < %d, got %d\n",
+				GMU_VER_MINOR(ver), minor);
+
+	/* Save the gmu version information */
+	gmu->ver.core = ver;
+
+	return 0;
+}
+
+int a6xx_hfi_send_bcl_feature_ctrl(struct adreno_device *adreno_dev)
+{
+	int ret;
+
+	if (!adreno_dev->bcl_enabled)
+		return 0;
+
+	ret = a6xx_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_BCL, 1, 0);
+
+	return ret;
+}
+
+int a6xx_hfi_send_lm_feature_ctrl(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct hfi_set_value_cmd req;
+	u32 slope = 0;
+	int ret;
+
+	if (!adreno_dev->lm_enabled)
+		return 0;
+
+	memset(&req, 0, sizeof(req));
+
+	nvmem_cell_read_u32(&device->pdev->dev, "isense_slope", &slope);
+
+	ret = CMD_MSG_HDR(req, H2F_MSG_SET_VALUE);
+	if (ret)
+		return ret;
+
+	req.type = HFI_VALUE_LM_CS0;
+	req.subtype = 0;
+	req.data = slope;
+
+	ret = a6xx_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_LM, 1,
+			device->pwrctrl.throttle_mask);
+
+	if (!ret)
+		ret = a6xx_hfi_send_generic_req(adreno_dev, &req, sizeof(req));
+
+	return ret;
+}
+
+int a6xx_hfi_send_acd_feature_ctrl(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	int ret = 0;
+
+	if (adreno_dev->acd_enabled) {
+		ret = a6xx_hfi_send_generic_req(adreno_dev,
+			&gmu->hfi.acd_table, sizeof(gmu->hfi.acd_table));
+		if (!ret)
+			ret = a6xx_hfi_send_feature_ctrl(adreno_dev,
+				HFI_FEATURE_ACD, 1, 0);
+	}
+
+	return ret;
+}
+
+static void reset_hfi_queues(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem;
+	struct hfi_queue_table *tbl = mem_addr->hostptr;
+	struct hfi_queue_header *hdr;
+	unsigned int i;
+
+	/* Flush HFI queues */
+	for (i = 0; i < HFI_QUEUE_MAX; i++) {
+		hdr = &tbl->qhdr[i];
+
+		if (hdr->status == HFI_QUEUE_STATUS_DISABLED)
+			continue;
+
+		hdr->read_index = hdr->write_index;
+	}
+}
+
+int a6xx_hfi_start(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int result;
+
+	reset_hfi_queues(adreno_dev);
+
+	/* This is legacy HFI message for A630 and A615 family firmware */
+	if (adreno_is_a630(adreno_dev) || adreno_is_a615_family(adreno_dev)) {
+		result = a6xx_hfi_send_gmu_init(adreno_dev);
+		if (result)
+			goto err;
+	}
+
+	result = a6xx_hfi_verify_fw_version(adreno_dev);
+	if (result)
+		goto err;
+
+	if (GMU_VER_MAJOR(gmu->ver.hfi) < 2)
+		result = a6xx_hfi_send_dcvstbl_v1(adreno_dev);
+	else
+		result = a6xx_hfi_send_generic_req(adreno_dev,
+			&gmu->hfi.dcvs_table, sizeof(gmu->hfi.dcvs_table));
+	if (result)
+		goto err;
+
+	result = a6xx_hfi_send_generic_req(adreno_dev, &gmu->hfi.bw_table,
+			sizeof(gmu->hfi.bw_table));
+	if (result)
+		goto err;
+
+	/*
+	 * If quirk is enabled send H2F_MSG_TEST and tell the GMU
+	 * we are sending no more HFIs until the next boot otherwise
+	 * send H2F_MSG_CORE_FW_START and features for A640 devices
+	 */
+	if (GMU_VER_MAJOR(gmu->ver.hfi) >= 2) {
+		result = a6xx_hfi_send_acd_feature_ctrl(adreno_dev);
+		if (result)
+			goto err;
+
+		result = a6xx_hfi_send_lm_feature_ctrl(adreno_dev);
+		if (result)
+			goto err;
+
+		result = a6xx_hfi_send_bcl_feature_ctrl(adreno_dev);
+		if (result)
+			goto err;
+
+		result = a6xx_hfi_send_core_fw_start(adreno_dev);
+		if (result)
+			goto err;
+	} else {
+		if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_HFI_USE_REG)) {
+			result = a6xx_hfi_send_test(adreno_dev);
+			if (result)
+				goto err;
+		}
+	}
+
+	set_bit(GMU_PRIV_HFI_STARTED, &gmu->flags);
+
+	/* Request default DCVS level */
+	result = kgsl_pwrctrl_set_default_gpu_pwrlevel(device);
+	if (result)
+		goto err;
+
+	/* Request default BW vote */
+	result = kgsl_pwrctrl_axi(device, true);
+
+err:
+	if (result)
+		a6xx_hfi_stop(adreno_dev);
+
+	return result;
+
+}
+
+void a6xx_hfi_stop(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	kgsl_pwrctrl_axi(device, false);
+
+	clear_bit(GMU_PRIV_HFI_STARTED, &gmu->flags);
+}
+
+/* HFI interrupt handler */
+irqreturn_t a6xx_hfi_irq_handler(int irq, void *data)
+{
+	struct kgsl_device *device = data;
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(ADRENO_DEVICE(device));
+	unsigned int status = 0;
+
+	gmu_core_regread(device, A6XX_GMU_GMU2HOST_INTR_INFO, &status);
+	gmu_core_regwrite(device, A6XX_GMU_GMU2HOST_INTR_CLR, HFI_IRQ_MASK);
+
+	if (status & HFI_IRQ_DBGQ_MASK)
+		a6xx_hfi_process_queue(gmu, HFI_DBG_ID, NULL);
+	if (status & HFI_IRQ_CM3_FAULT_MASK) {
+		dev_err_ratelimited(&gmu->pdev->dev,
+				"GMU CM3 fault interrupt received\n");
+		atomic_set(&gmu->cm3_fault, 1);
+
+		/* make sure other CPUs see the update */
+		smp_wmb();
+	}
+	if (status & ~HFI_IRQ_MASK)
+		dev_err_ratelimited(&gmu->pdev->dev,
+				"Unhandled HFI interrupts 0x%lx\n",
+				status & ~HFI_IRQ_MASK);
+
+	return IRQ_HANDLED;
+}
--- a/qcom/opensource/graphics-kernel/adreno_a6xx_hfi.h
+++ b/qcom/opensource/graphics-kernel/adreno_a6xx_hfi.h
@ -0,0 +1,188 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2017-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+#ifndef __ADRENO_A6XX_HFI_H
+#define __ADRENO_A6XX_HFI_H
+
+#include "adreno_hfi.h"
+
+/**
+ * struct a6xx_hfi - HFI control structure
+ * @seqnum: atomic counter that is incremented for each message sent. The
+ *	value of the counter is used as sequence number for HFI message
+ * @bw_table: HFI BW table buffer
+ * @acd_table: HFI table for ACD data
+ */
+struct a6xx_hfi {
+	/** @irq: HFI interrupt line */
+	int irq;
+	atomic_t seqnum;
+	/** @hfi_mem: Memory descriptor for the hfi memory */
+	struct kgsl_memdesc *hfi_mem;
+	struct hfi_bwtable_cmd bw_table;
+	struct hfi_acd_table_cmd acd_table;
+	/** @dcvs_table: HFI table for gpu dcvs levels */
+	struct hfi_dcvstable_cmd dcvs_table;
+};
+
+struct a6xx_gmu_device;
+
+/* a6xx_hfi_irq_handler - IRQ handler for HFI interripts */
+irqreturn_t a6xx_hfi_irq_handler(int irq, void *data);
+
+/**
+ * a6xx_hfi_start - Send the various HFIs during device boot up
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_hfi_start(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_hfi_start - Send the various HFIs during device boot up
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+void a6xx_hfi_stop(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_hfi_init - Initialize hfi resources
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * This function allocates and sets up hfi queues
+ * when a process creates the very first kgsl instance
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_hfi_init(struct adreno_device *adreno_dev);
+
+/* Helper function to get to a6xx hfi struct from adreno device */
+struct a6xx_hfi *to_a6xx_hfi(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_hfi_queue_write - Write a command to hfi queue
+ * @adreno_dev: Pointer to the adreno device
+ * @queue_idx: destination queue id
+ * @msg: Data to be written to the queue
+ * @size_bytes: Size of the command in bytes
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_hfi_queue_write(struct adreno_device *adreno_dev, u32 queue_idx,
+	u32 *msg, u32 size_bytes);
+
+/**
+ * a6xx_hfi_queue_read - Read data from hfi queue
+ * @gmu: Pointer to the a6xx gmu device
+ * @queue_idx: queue id to read from
+ * @output: Pointer to read the data into
+ * @max_size: Number of bytes to read from the queue
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_hfi_queue_read(struct a6xx_gmu_device *gmu, u32 queue_idx,
+	u32 *output, u32 max_size);
+
+/**
+ * a6xx_receive_ack_cmd - Process ack type packets
+ * @gmu: Pointer to the a6xx gmu device
+ * @rcvd: Pointer to the data read from hfi queue
+ * @ret_cmd: Container for the hfi packet for which this ack is received
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_receive_ack_cmd(struct a6xx_gmu_device *gmu, void *rcvd,
+	struct pending_cmd *ret_cmd);
+
+/**
+ * a6xx_hfi_send_feature_ctrl - Enable gmu feature via hfi
+ * @adreno_dev: Pointer to the adreno device
+ * @feature: feature to be enabled or disabled
+ * enable: Set 1 to enable or 0 to disable a feature
+ * @data: payload for the send feature hfi packet
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_hfi_send_feature_ctrl(struct adreno_device *adreno_dev,
+	u32 feature, u32 enable, u32 data);
+
+/**
+ * a6xx_hfi_send_set_value - Send gmu set_values via hfi
+ * @adreno_dev: Pointer to the adreno device
+ * @type: GMU set_value type
+ * @subtype: GMU set_value subtype
+ * @data: Value to set
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_hfi_send_set_value(struct adreno_device *adreno_dev,
+		u32 type, u32 subtype, u32 data);
+
+/**
+ * a6xx_hfi_send_core_fw_start - Send the core fw start hfi
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_hfi_send_core_fw_start(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_hfi_send_acd_feature_ctrl - Send the acd table and acd feature
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_hfi_send_acd_feature_ctrl(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_hfi_send_lm_feature_ctrl -  Send the lm feature hfi packet
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_hfi_send_lm_feature_ctrl(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_hfi_send_generic_req -  Send a generic hfi packet
+ * @adreno_dev: Pointer to the adreno device
+ * @cmd: Pointer to the hfi packet header and data
+ * @size_bytes: Size of the command in bytes
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_hfi_send_generic_req(struct adreno_device *adreno_dev, void *cmd, u32 size_bytes);
+
+/**
+ * a6xx_hfi_send_bcl_feature_ctrl -  Send the bcl feature hfi packet
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_hfi_send_bcl_feature_ctrl(struct adreno_device *adreno_dev);
+
+/*
+ * a6xx_hfi_process_queue - Check hfi queue for messages from gmu
+ * @gmu: Pointer to the a6xx gmu device
+ * @queue_idx: queue id to be processed
+ * @ret_cmd: Container for data needed for waiting for the ack
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_hfi_process_queue(struct a6xx_gmu_device *gmu,
+	u32 queue_idx, struct pending_cmd *ret_cmd);
+
+/**
+ * a6xx_hfi_cmdq_write - Write a command to command queue
+ * @adreno_dev: Pointer to the adreno device
+ * @msg: Data to be written to the queue
+ * @size_bytes: Size of the command in bytes
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_hfi_cmdq_write(struct adreno_device *adreno_dev, u32 *msg, u32 size_bytes);
+void adreno_a6xx_receive_err_req(struct a6xx_gmu_device *gmu, void *rcvd);
+void adreno_a6xx_receive_debug_req(struct a6xx_gmu_device *gmu, void *rcvd);
+#endif
--- a/qcom/opensource/graphics-kernel/adreno_a6xx_hwsched.c
+++ b/qcom/opensource/graphics-kernel/adreno_a6xx_hwsched.c
--- a/qcom/opensource/graphics-kernel/adreno_a6xx_hwsched.h
+++ b/qcom/opensource/graphics-kernel/adreno_a6xx_hwsched.h
@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _ADRENO_A6XX_HWSCHED_H_
+#define _ADRENO_A6XX_HWSCHED_H_
+
+#include "adreno_a6xx_hwsched_hfi.h"
+
+/**
+ * struct a6xx_hwsched_device - Container for the a6xx hwscheduling device
+ */
+struct a6xx_hwsched_device {
+	/** @a6xx_dev: Container for the a6xx device */
+	struct a6xx_device a6xx_dev;
+	/** @hwsched_hfi: Container for hwscheduling specific hfi resources */
+	struct a6xx_hwsched_hfi hwsched_hfi;
+};
+
+/**
+ * a6xx_hwsched_probe - Target specific probe for hwsched
+ * @pdev: Pointer to the platform device
+ * @chipid: Chipid of the target
+ * @gpucore: Pointer to the gpucore
+ *
+ * The target specific probe function for hwsched enabled gmu targets.
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_hwsched_probe(struct platform_device *pdev,
+	u32 chipid, const struct adreno_gpu_core *gpucore);
+
+/**
+ * a6xx_hwsched_reset_replay - Restart the gmu and gpu
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Restart the GMU and GPU and replay the inflight commands
+
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_hwsched_reset_replay(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_hwsched_snapshot - take a6xx hwsched snapshot
+ * @adreno_dev: Pointer to the adreno device
+ * @snapshot: Pointer to the snapshot instance
+ *
+ * Snapshot the faulty ib and then snapshot rest of a6xx gmu things
+ */
+void a6xx_hwsched_snapshot(struct adreno_device *adreno_dev,
+	struct kgsl_snapshot *snapshot);
+
+/**
+ * a6xx_hwsched_handle_watchdog - Handle watchdog interrupt
+ * @adreno_dev: Pointer to the adreno device
+ */
+void a6xx_hwsched_handle_watchdog(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_hwsched_active_count_get - Increment the active count
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * This function increments the active count. If active count
+ * is 0, this function also powers up the device.
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_hwsched_active_count_get(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_hwsched_active_count_put - Put back the active count
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * This function decrements the active count sets the idle
+ * timer if active count is zero.
+ */
+void a6xx_hwsched_active_count_put(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_hwsched_add_to_minidump - Register hwsched_device with va minidump
+ * @adreno_dev: Pointer to the adreno device
+ */
+int a6xx_hwsched_add_to_minidump(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_hwsched_send_recurring_cmdobj - Dispatch IBs to GMU
+ * @adreno_dev: Pointer to adreno device structure
+ * @cmdobj: The command object which needs to be submitted
+ *
+ * This function is used to register the context if needed and submit
+ * recurring IBs to the GMU. Upon receiving ipc interrupt GMU will submit
+ * recurring IBs to GPU.
+
+ * Return: 0 on success and negative error on failure
+ */
+int a6xx_hwsched_send_recurring_cmdobj(struct adreno_device *adreno_dev,
+		struct kgsl_drawobj_cmd *cmdobj);
+#endif
--- a/qcom/opensource/graphics-kernel/adreno_a6xx_hwsched_hfi.c
+++ b/qcom/opensource/graphics-kernel/adreno_a6xx_hwsched_hfi.c
--- a/qcom/opensource/graphics-kernel/adreno_a6xx_hwsched_hfi.h
+++ b/qcom/opensource/graphics-kernel/adreno_a6xx_hwsched_hfi.h
@ -0,0 +1,152 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _ADRENO_A6XX_HWSCHED_HFI_H_
+#define _ADRENO_A6XX_HWSCHED_HFI_H_
+
+struct a6xx_hwsched_hfi {
+	struct hfi_mem_alloc_entry mem_alloc_table[32];
+	u32 mem_alloc_entries;
+	/** @irq_mask: Store the hfi interrupt mask */
+	u32 irq_mask;
+	/** @msglock: To protect the list of un-ACKed hfi packets */
+	rwlock_t msglock;
+	/** @msglist: List of un-ACKed hfi packets */
+	struct list_head msglist;
+	/** @f2h_task: Task for processing gmu fw to host packets */
+	struct task_struct *f2h_task;
+	/** @f2h_wq: Waitqueue for the f2h_task */
+	wait_queue_head_t f2h_wq;
+	/** @big_ib: GMU buffer to hold big IBs */
+	struct kgsl_memdesc *big_ib;
+	/** @big_ib_recurring: GMU buffer to hold big recurring IBs */
+	struct kgsl_memdesc *big_ib_recurring;
+	/** @perfctr_scratch: Buffer to hold perfcounter PM4 commands */
+	struct kgsl_memdesc *perfctr_scratch;
+	/** @msg_mutex: Mutex for accessing the msgq */
+	struct mutex msgq_mutex;
+};
+
+struct kgsl_drawobj_cmd;
+
+/**
+ * a6xx_hwsched_hfi_probe - Probe hwsched hfi resources
+ * @adreno_dev: Pointer to adreno device structure
+ *
+ * Return: 0 on success and negative error on failure.
+ */
+int a6xx_hwsched_hfi_probe(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_hwsched_hfi_remove - Release hwsched hfi resources
+ * @adreno_dev: Pointer to adreno device structure
+ */
+void a6xx_hwsched_hfi_remove(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_hwsched_hfi_init - Initialize hfi resources
+ * @adreno_dev: Pointer to adreno device structure
+ *
+ * This function is used to initialize hfi resources
+ * once before the very first gmu boot
+ *
+ * Return: 0 on success and negative error on failure.
+ */
+int a6xx_hwsched_hfi_init(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_hwsched_hfi_start - Start hfi resources
+ * @adreno_dev: Pointer to adreno device structure
+ *
+ * Send the various hfi packets before booting the gpu
+ *
+ * Return: 0 on success and negative error on failure.
+ */
+int a6xx_hwsched_hfi_start(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_hwsched_hfi_stop - Stop the hfi resources
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * This function does the hfi cleanup when powering down the gmu
+ */
+void a6xx_hwsched_hfi_stop(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_hwched_cp_init - Send CP_INIT via HFI
+ * @adreno_dev: Pointer to adreno device structure
+ *
+ * This function is used to send CP INIT packet and bring
+ * GPU out of secure mode using hfi raw packets.
+ *
+ * Return: 0 on success and negative error on failure.
+ */
+int a6xx_hwsched_cp_init(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_hwsched_counter_inline_enable - Configure a performance counter for a countable
+ * @adreno_dev -  Adreno device to configure
+ * @group - Desired performance counter group
+ * @counter - Desired performance counter in the group
+ * @countable - Desired countable
+ *
+ * Physically set up a counter within a group with the desired countable
+ * Return 0 on success or negative error on failure.
+ */
+int a6xx_hwsched_counter_inline_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		u32 counter, u32 countable);
+
+/**
+ * a6xx_hfi_send_cmd_async - Send an hfi packet
+ * @adreno_dev: Pointer to adreno device structure
+ * @data: Data to be sent in the hfi packet
+ * @size_bytes: Size of the packet in bytes
+ *
+ * Send data in the form of an HFI packet to gmu and wait for
+ * it's ack asynchronously
+ *
+ * Return: 0 on success and negative error on failure.
+ */
+int a6xx_hfi_send_cmd_async(struct adreno_device *adreno_dev, void *data, u32 size_bytes);
+
+/**
+ * a6xx_hwsched_submit_drawobj - Dispatch IBs to dispatch queues
+ * @adreno_dev: Pointer to adreno device structure
+ * @drawobj: The command draw object which needs to be submitted
+ *
+ * This function is used to register the context if needed and submit
+ * IBs to the hfi dispatch queues.
+
+ * Return: 0 on success and negative error on failure
+ */
+int a6xx_hwsched_submit_drawobj(struct adreno_device *adreno_dev,
+	struct kgsl_drawobj *drawobj);
+
+/**
+ * a6xx_hwsched_context_detach - Unregister a context with GMU
+ * @drawctxt: Pointer to the adreno context
+ *
+ * This function sends context unregister HFI and waits for the ack
+ * to ensure all submissions from this context have retired
+ */
+void a6xx_hwsched_context_detach(struct adreno_context *drawctxt);
+
+/* Helper function to get to a6xx hwsched hfi device from adreno device */
+struct a6xx_hwsched_hfi *to_a6xx_hwsched_hfi(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_hwsched_preempt_count_get - Get preemption count from GMU
+ * @adreno_dev: Pointer to adreno device
+ *
+ * This function sends a GET_VALUE HFI packet to get the number of
+ * preemptions completed since last SLUMBER exit.
+ *
+ * Return: Preemption count
+ */
+u32 a6xx_hwsched_preempt_count_get(struct adreno_device *adreno_dev);
+
+#endif
--- a/qcom/opensource/graphics-kernel/adreno_a6xx_perfcounter.c
+++ b/qcom/opensource/graphics-kernel/adreno_a6xx_perfcounter.c
--- a/qcom/opensource/graphics-kernel/adreno_a6xx_preempt.c
+++ b/qcom/opensource/graphics-kernel/adreno_a6xx_preempt.c
@ -0,0 +1,793 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2017-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include "adreno.h"
+#include "adreno_a6xx.h"
+#include "adreno_pm4types.h"
+#include "adreno_trace.h"
+
+#define PREEMPT_RECORD(_field) \
+		offsetof(struct a6xx_cp_preemption_record, _field)
+
+#define PREEMPT_SMMU_RECORD(_field) \
+		offsetof(struct a6xx_cp_smmu_info, _field)
+
+static void _update_wptr(struct adreno_device *adreno_dev, bool reset_timer,
+	bool atomic)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_ringbuffer *rb = adreno_dev->cur_rb;
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&rb->preempt_lock, flags);
+
+	if (!atomic) {
+		/*
+		 * We might have skipped updating the wptr in case we are in
+		 * dispatcher context. Do it now.
+		 */
+		if (rb->skip_inline_wptr) {
+
+			ret = a6xx_fenced_write(adreno_dev,
+				A6XX_CP_RB_WPTR, rb->wptr,
+				FENCE_STATUS_WRITEDROPPED0_MASK);
+
+			reset_timer = true;
+			rb->skip_inline_wptr = false;
+		}
+	} else {
+		unsigned int wptr;
+
+		kgsl_regread(device, A6XX_CP_RB_WPTR, &wptr);
+		if (wptr != rb->wptr) {
+			kgsl_regwrite(device, A6XX_CP_RB_WPTR, rb->wptr);
+			reset_timer = true;
+		}
+	}
+
+	if (reset_timer)
+		rb->dispatch_q.expires = jiffies +
+			msecs_to_jiffies(adreno_drawobj_timeout);
+
+	spin_unlock_irqrestore(&rb->preempt_lock, flags);
+
+	if (!atomic) {
+		/* If WPTR update fails, set the fault and trigger recovery */
+		if (ret) {
+			gmu_core_fault_snapshot(device);
+			adreno_dispatcher_fault(adreno_dev,
+				ADRENO_GMU_FAULT_SKIP_SNAPSHOT);
+		}
+	}
+}
+
+static void _power_collapse_set(struct adreno_device *adreno_dev, bool val)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (!gmu_core_isenabled(device))
+		return;
+
+	if (val) {
+		if (adreno_is_a660(adreno_dev) ||
+				adreno_is_a663(adreno_dev))
+			gmu_core_regwrite(device,
+				 A6XX_GMU_PWR_COL_PREEMPT_KEEPALIVE, 0x1);
+		else
+			gmu_core_regrmw(device,
+				 A6XX_GMU_AO_SPARE_CNTL, 0x0, 0x2);
+	} else {
+		if (adreno_is_a660(adreno_dev) ||
+				adreno_is_a663(adreno_dev))
+			gmu_core_regwrite(device,
+				 A6XX_GMU_PWR_COL_PREEMPT_KEEPALIVE, 0x0);
+		else
+			gmu_core_regrmw(device,
+				 A6XX_GMU_AO_SPARE_CNTL, 0x2, 0x0);
+	}
+}
+
+static void _a6xx_preemption_done(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int status;
+
+	/*
+	 * In the very unlikely case that the power is off, do nothing - the
+	 * state will be reset on power up and everybody will be happy
+	 */
+
+	if (!kgsl_state_is_awake(device))
+		return;
+
+	kgsl_regread(device, A6XX_CP_CONTEXT_SWITCH_CNTL, &status);
+
+	if (status & 0x1) {
+		dev_err(device->dev,
+			     "Preemption not complete: status=%X cur=%d R/W=%X/%X next=%d R/W=%X/%X\n",
+			     status, adreno_dev->cur_rb->id,
+			     adreno_get_rptr(adreno_dev->cur_rb),
+			     adreno_dev->cur_rb->wptr,
+			     adreno_dev->next_rb->id,
+			     adreno_get_rptr(adreno_dev->next_rb),
+			     adreno_dev->next_rb->wptr);
+
+		/* Set a fault and restart */
+		adreno_dispatcher_fault(adreno_dev, ADRENO_PREEMPT_FAULT);
+
+		return;
+	}
+
+	adreno_dev->preempt.count++;
+
+	/*
+	 * In normal scenarios, preemption keep alive bit is cleared during
+	 * CP interrupt callback. However, if preemption is successful
+	 * immediately after preemption timer expires or there is a preemption
+	 * interrupt with non-zero status, the state is transitioned to complete
+	 * state. Once dispatcher is scheduled, it calls this function.
+	 * We can now safely clear the preemption keepalive bit, allowing
+	 * power collapse to resume its regular activity.
+	 */
+	_power_collapse_set(adreno_dev, false);
+
+	del_timer_sync(&adreno_dev->preempt.timer);
+
+	kgsl_regread(device,  A6XX_CP_CONTEXT_SWITCH_LEVEL_STATUS, &status);
+
+	trace_adreno_preempt_done(adreno_dev->cur_rb->id, adreno_dev->next_rb->id,
+		status, 0);
+
+	/* Clean up all the bits */
+	adreno_dev->prev_rb = adreno_dev->cur_rb;
+	adreno_dev->cur_rb = adreno_dev->next_rb;
+	adreno_dev->next_rb = NULL;
+
+	/* Update the wptr for the new command queue */
+	_update_wptr(adreno_dev, true, false);
+
+	/* Update the dispatcher timer for the new command queue */
+	mod_timer(&adreno_dev->dispatcher.timer,
+		adreno_dev->cur_rb->dispatch_q.expires);
+
+	/* Clear the preempt state */
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE);
+}
+
+static void _a6xx_preemption_fault(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int status;
+
+	/*
+	 * If the power is on check the preemption status one more time - if it
+	 * was successful then just transition to the complete state
+	 */
+	if (kgsl_state_is_awake(device)) {
+		kgsl_regread(device, A6XX_CP_CONTEXT_SWITCH_CNTL, &status);
+
+		if (!(status & 0x1)) {
+			adreno_set_preempt_state(adreno_dev,
+				ADRENO_PREEMPT_COMPLETE);
+
+			adreno_dispatcher_schedule(device);
+			return;
+		}
+	}
+
+	dev_err(device->dev,
+		     "Preemption Fault: cur=%d R/W=0x%x/0x%x, next=%d R/W=0x%x/0x%x\n",
+		     adreno_dev->cur_rb->id,
+		     adreno_get_rptr(adreno_dev->cur_rb),
+		     adreno_dev->cur_rb->wptr,
+		     adreno_dev->next_rb->id,
+		     adreno_get_rptr(adreno_dev->next_rb),
+		     adreno_dev->next_rb->wptr);
+
+	adreno_dispatcher_fault(adreno_dev, ADRENO_PREEMPT_FAULT);
+}
+
+static void _a6xx_preemption_worker(struct work_struct *work)
+{
+	struct adreno_preemption *preempt = container_of(work,
+		struct adreno_preemption, work);
+	struct adreno_device *adreno_dev = container_of(preempt,
+		struct adreno_device, preempt);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	/* Need to take the mutex to make sure that the power stays on */
+	mutex_lock(&device->mutex);
+
+	if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_FAULTED))
+		_a6xx_preemption_fault(adreno_dev);
+
+	mutex_unlock(&device->mutex);
+}
+
+/* Find the highest priority active ringbuffer */
+static struct adreno_ringbuffer *a6xx_next_ringbuffer(
+		struct adreno_device *adreno_dev)
+{
+	struct adreno_ringbuffer *rb;
+	unsigned long flags;
+	unsigned int i;
+
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		bool empty;
+
+		spin_lock_irqsave(&rb->preempt_lock, flags);
+		empty = adreno_rb_empty(rb);
+		spin_unlock_irqrestore(&rb->preempt_lock, flags);
+
+		if (!empty)
+			return rb;
+	}
+
+	return NULL;
+}
+
+void a6xx_preemption_trigger(struct adreno_device *adreno_dev, bool atomic)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_iommu *iommu = KGSL_IOMMU(device);
+	struct adreno_ringbuffer *next;
+	uint64_t ttbr0, gpuaddr;
+	unsigned int contextidr, cntl;
+	unsigned long flags;
+	struct adreno_preemption *preempt = &adreno_dev->preempt;
+
+	/* Put ourselves into a possible trigger state */
+	if (!adreno_move_preempt_state(adreno_dev,
+		ADRENO_PREEMPT_NONE, ADRENO_PREEMPT_START))
+		return;
+
+	/* Get the next ringbuffer to preempt in */
+	next = a6xx_next_ringbuffer(adreno_dev);
+
+	/*
+	 * Nothing to do if every ringbuffer is empty or if the current
+	 * ringbuffer is the only active one
+	 */
+	if (next == NULL || next == adreno_dev->cur_rb) {
+		/*
+		 * Update any critical things that might have been skipped while
+		 * we were looking for a new ringbuffer
+		 */
+
+		if (next != NULL) {
+			_update_wptr(adreno_dev, false, atomic);
+
+			mod_timer(&adreno_dev->dispatcher.timer,
+				adreno_dev->cur_rb->dispatch_q.expires);
+		}
+
+		adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE);
+		return;
+	}
+
+	/* Turn off the dispatcher timer */
+	del_timer(&adreno_dev->dispatcher.timer);
+
+	/*
+	 * This is the most critical section - we need to take care not to race
+	 * until we have programmed the CP for the switch
+	 */
+
+	spin_lock_irqsave(&next->preempt_lock, flags);
+
+	/* Get the pagetable from the pagetable info. */
+	kgsl_sharedmem_readq(device->scratch, &ttbr0,
+		SCRATCH_RB_OFFSET(next->id, ttbr0));
+	kgsl_sharedmem_readl(device->scratch, &contextidr,
+		SCRATCH_RB_OFFSET(next->id, contextidr));
+
+	kgsl_sharedmem_writel(next->preemption_desc,
+		PREEMPT_RECORD(wptr), next->wptr);
+
+	spin_unlock_irqrestore(&next->preempt_lock, flags);
+
+	/* And write it to the smmu info */
+
+	if (kgsl_mmu_is_perprocess(&device->mmu)) {
+		kgsl_sharedmem_writeq(iommu->smmu_info,
+			PREEMPT_SMMU_RECORD(ttbr0), ttbr0);
+		kgsl_sharedmem_writel(iommu->smmu_info,
+			PREEMPT_SMMU_RECORD(context_idr), contextidr);
+	}
+
+	kgsl_sharedmem_readq(preempt->scratch, &gpuaddr,
+		next->id * sizeof(u64));
+
+	/*
+	 * Set a keepalive bit before the first preemption register write.
+	 * This is required since while each individual write to the context
+	 * switch registers will wake the GPU from collapse, it will not in
+	 * itself cause GPU activity. Thus, the GPU could technically be
+	 * re-collapsed between subsequent register writes leading to a
+	 * prolonged preemption sequence. The keepalive bit prevents any
+	 * further power collapse while it is set.
+	 * It is more efficient to use a keepalive+wake-on-fence approach here
+	 * rather than an OOB. Both keepalive and the fence are effectively
+	 * free when the GPU is already powered on, whereas an OOB requires an
+	 * unconditional handshake with the GMU.
+	 */
+	_power_collapse_set(adreno_dev, true);
+
+	/*
+	 * Fenced writes on this path will make sure the GPU is woken up
+	 * in case it was power collapsed by the GMU.
+	 */
+	if (a6xx_fenced_write(adreno_dev,
+		A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_LO,
+		lower_32_bits(next->preemption_desc->gpuaddr),
+		FENCE_STATUS_WRITEDROPPED1_MASK))
+		goto err;
+
+	/*
+	 * Above fence writes will make sure GMU comes out of
+	 * IFPC state if its was in IFPC state but it doesn't
+	 * guarantee that GMU FW actually moved to ACTIVE state
+	 * i.e. wake-up from IFPC is complete.
+	 * Wait for GMU to move to ACTIVE state before triggering
+	 * preemption. This is require to make sure CP doesn't
+	 * interrupt GMU during wake-up from IFPC.
+	 */
+	if (!atomic && gmu_core_dev_wait_for_active_transition(device))
+		goto err;
+
+	if (a6xx_fenced_write(adreno_dev,
+		A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_HI,
+		upper_32_bits(next->preemption_desc->gpuaddr),
+		FENCE_STATUS_WRITEDROPPED1_MASK))
+		goto err;
+
+	if (a6xx_fenced_write(adreno_dev,
+		A6XX_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_LO,
+		lower_32_bits(next->secure_preemption_desc->gpuaddr),
+		FENCE_STATUS_WRITEDROPPED1_MASK))
+		goto err;
+
+	if (a6xx_fenced_write(adreno_dev,
+		A6XX_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_HI,
+		upper_32_bits(next->secure_preemption_desc->gpuaddr),
+		FENCE_STATUS_WRITEDROPPED1_MASK))
+		goto err;
+
+	if (a6xx_fenced_write(adreno_dev,
+		A6XX_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_LO,
+		lower_32_bits(gpuaddr),
+		FENCE_STATUS_WRITEDROPPED1_MASK))
+		goto err;
+
+	if (a6xx_fenced_write(adreno_dev,
+		A6XX_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_HI,
+		upper_32_bits(gpuaddr),
+		FENCE_STATUS_WRITEDROPPED1_MASK))
+		goto err;
+
+	adreno_dev->next_rb = next;
+
+	/* Start the timer to detect a stuck preemption */
+	mod_timer(&adreno_dev->preempt.timer,
+		jiffies + msecs_to_jiffies(ADRENO_PREEMPT_TIMEOUT));
+
+	cntl = (preempt->preempt_level << 6) | 0x01;
+
+	/* Skip save/restore during L1 preemption */
+	if (preempt->skipsaverestore)
+		cntl |= (1 << 9);
+
+	/* Enable GMEM save/restore across preemption */
+	if (preempt->usesgmem)
+		cntl |= (1 << 8);
+
+	trace_adreno_preempt_trigger(adreno_dev->cur_rb->id, adreno_dev->next_rb->id,
+		cntl, 0);
+
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_TRIGGERED);
+
+	/* Trigger the preemption */
+	if (a6xx_fenced_write(adreno_dev, A6XX_CP_CONTEXT_SWITCH_CNTL, cntl,
+					FENCE_STATUS_WRITEDROPPED1_MASK)) {
+		adreno_dev->next_rb = NULL;
+		del_timer(&adreno_dev->preempt.timer);
+		goto err;
+	}
+
+	return;
+err:
+	/* If fenced write fails, take inline snapshot and trigger recovery */
+	if (!atomic) {
+		gmu_core_fault_snapshot(device);
+		adreno_dispatcher_fault(adreno_dev,
+			ADRENO_GMU_FAULT_SKIP_SNAPSHOT);
+	} else {
+		adreno_dispatcher_fault(adreno_dev, ADRENO_GMU_FAULT);
+	}
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE);
+	/* Clear the keep alive */
+	_power_collapse_set(adreno_dev, false);
+
+}
+
+void a6xx_preemption_callback(struct adreno_device *adreno_dev, int bit)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int status;
+
+	if (!adreno_move_preempt_state(adreno_dev,
+		ADRENO_PREEMPT_TRIGGERED, ADRENO_PREEMPT_PENDING))
+		return;
+
+	kgsl_regread(device, A6XX_CP_CONTEXT_SWITCH_CNTL, &status);
+
+	if (status & 0x1) {
+		dev_err(KGSL_DEVICE(adreno_dev)->dev,
+			     "preempt interrupt with non-zero status: %X\n",
+			     status);
+
+		/*
+		 * Under the assumption that this is a race between the
+		 * interrupt and the register, schedule the worker to clean up.
+		 * If the status still hasn't resolved itself by the time we get
+		 * there then we have to assume something bad happened
+		 */
+		adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_COMPLETE);
+		adreno_dispatcher_schedule(KGSL_DEVICE(adreno_dev));
+		return;
+	}
+
+	adreno_dev->preempt.count++;
+
+	/*
+	 * We can now safely clear the preemption keepalive bit, allowing
+	 * power collapse to resume its regular activity.
+	 */
+	_power_collapse_set(adreno_dev, false);
+
+	del_timer(&adreno_dev->preempt.timer);
+
+	kgsl_regread(device, A6XX_CP_CONTEXT_SWITCH_LEVEL_STATUS, &status);
+
+	trace_adreno_preempt_done(adreno_dev->cur_rb->id, adreno_dev->next_rb->id,
+		status, 0);
+
+	adreno_dev->prev_rb = adreno_dev->cur_rb;
+	adreno_dev->cur_rb = adreno_dev->next_rb;
+	adreno_dev->next_rb = NULL;
+
+	/* Update the wptr if it changed while preemption was ongoing */
+	_update_wptr(adreno_dev, true, true);
+
+	/* Update the dispatcher timer for the new command queue */
+	mod_timer(&adreno_dev->dispatcher.timer,
+		adreno_dev->cur_rb->dispatch_q.expires);
+
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE);
+
+	a6xx_preemption_trigger(adreno_dev, true);
+}
+
+void a6xx_preemption_schedule(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return;
+
+	mutex_lock(&device->mutex);
+
+	if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_COMPLETE))
+		_a6xx_preemption_done(adreno_dev);
+
+	a6xx_preemption_trigger(adreno_dev, false);
+
+	mutex_unlock(&device->mutex);
+}
+
+u32 a6xx_preemption_pre_ibsubmit(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb, struct adreno_context *drawctxt,
+		u32 *cmds)
+{
+	unsigned int *cmds_orig = cmds;
+	uint64_t gpuaddr = 0;
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return 0;
+
+	if (drawctxt) {
+		gpuaddr = drawctxt->base.user_ctxt_record->memdesc.gpuaddr;
+		*cmds++ = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 15);
+	} else {
+		*cmds++ = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 12);
+	}
+
+	/* NULL SMMU_INFO buffer - we track in KMD */
+	*cmds++ = SET_PSEUDO_SMMU_INFO;
+	cmds += cp_gpuaddr(adreno_dev, cmds, 0x0);
+
+	*cmds++ = SET_PSEUDO_PRIV_NON_SECURE_SAVE_ADDR;
+	cmds += cp_gpuaddr(adreno_dev, cmds, rb->preemption_desc->gpuaddr);
+
+	*cmds++ = SET_PSEUDO_PRIV_SECURE_SAVE_ADDR;
+	cmds += cp_gpuaddr(adreno_dev, cmds,
+			rb->secure_preemption_desc->gpuaddr);
+
+	if (drawctxt) {
+		*cmds++ = SET_PSEUDO_NON_PRIV_SAVE_ADDR;
+		cmds += cp_gpuaddr(adreno_dev, cmds, gpuaddr);
+	}
+
+	/*
+	 * There is no need to specify this address when we are about to
+	 * trigger preemption. This is because CP internally stores this
+	 * address specified here in the CP_SET_PSEUDO_REGISTER payload to
+	 * the context record and thus knows from where to restore
+	 * the saved perfcounters for the new ringbuffer.
+	 */
+	*cmds++ = SET_PSEUDO_COUNTER;
+	cmds += cp_gpuaddr(adreno_dev, cmds,
+			rb->perfcounter_save_restore_desc->gpuaddr);
+
+	if (drawctxt) {
+		struct adreno_ringbuffer *rb = drawctxt->rb;
+		uint64_t dest = PREEMPT_SCRATCH_ADDR(adreno_dev, rb->id);
+
+		*cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 2);
+		cmds += cp_gpuaddr(adreno_dev, cmds, dest);
+		*cmds++ = lower_32_bits(gpuaddr);
+		*cmds++ = upper_32_bits(gpuaddr);
+
+		/* Add a KMD post amble to clear the perf counters during preemption */
+		if (!adreno_dev->perfcounter) {
+			u64 kmd_postamble_addr = SCRATCH_POSTAMBLE_ADDR(KGSL_DEVICE(adreno_dev));
+			*cmds++ = cp_type7_packet(CP_SET_AMBLE, 3);
+			*cmds++ = lower_32_bits(kmd_postamble_addr);
+			*cmds++ = upper_32_bits(kmd_postamble_addr);
+			*cmds++ = FIELD_PREP(GENMASK(22, 20), CP_KMD_AMBLE_TYPE)
+				| (FIELD_PREP(GENMASK(19, 0), adreno_dev->preempt.postamble_len));
+		}
+	}
+
+	return (unsigned int) (cmds - cmds_orig);
+}
+
+u32 a6xx_preemption_post_ibsubmit(struct adreno_device *adreno_dev,
+		u32 *cmds)
+{
+	u32 index = 0;
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return 0;
+
+	if (adreno_dev->cur_rb) {
+		u64 dest = PREEMPT_SCRATCH_ADDR(adreno_dev, adreno_dev->cur_rb->id);
+
+		cmds[index++] = cp_type7_packet(CP_MEM_WRITE, 4);
+		cmds[index++] = lower_32_bits(dest);
+		cmds[index++] = upper_32_bits(dest);
+		cmds[index++] = 0;
+		cmds[index++] = 0;
+	}
+
+	cmds[index++] = cp_type7_packet(CP_CONTEXT_SWITCH_YIELD, 4);
+	cmds[index++] = 0;
+	cmds[index++] = 0;
+	cmds[index++] = 1;
+	cmds[index++] = 0;
+
+	return index;
+}
+
+void a6xx_preemption_start(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_iommu *iommu = KGSL_IOMMU(device);
+	struct adreno_ringbuffer *rb;
+	unsigned int i;
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return;
+
+	/* Force the state to be clear */
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE);
+
+	if (kgsl_mmu_is_perprocess(&device->mmu)) {
+		/* smmu_info is allocated and mapped in a6xx_preemption_iommu_init */
+		kgsl_sharedmem_writel(iommu->smmu_info,
+			PREEMPT_SMMU_RECORD(magic), A6XX_CP_SMMU_INFO_MAGIC_REF);
+		kgsl_sharedmem_writeq(iommu->smmu_info,
+			PREEMPT_SMMU_RECORD(ttbr0), MMU_DEFAULT_TTBR0(device));
+
+		/* The CP doesn't use the asid record, so poison it */
+		kgsl_sharedmem_writel(iommu->smmu_info,
+			PREEMPT_SMMU_RECORD(asid), 0xDECAFBAD);
+		kgsl_sharedmem_writel(iommu->smmu_info,
+			PREEMPT_SMMU_RECORD(context_idr), 0);
+
+		kgsl_regwrite(device, A6XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO,
+			lower_32_bits(iommu->smmu_info->gpuaddr));
+
+		kgsl_regwrite(device, A6XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI,
+			upper_32_bits(iommu->smmu_info->gpuaddr));
+	}
+
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		kgsl_sharedmem_writel(rb->preemption_desc,
+			PREEMPT_RECORD(rptr), 0);
+		kgsl_sharedmem_writel(rb->preemption_desc,
+			PREEMPT_RECORD(wptr), 0);
+
+		adreno_ringbuffer_set_pagetable(device, rb,
+			device->mmu.defaultpagetable);
+	}
+}
+
+static void reset_rb_preempt_record(struct adreno_device *adreno_dev,
+	struct adreno_ringbuffer *rb)
+{
+	u32 cp_rb_cntl = A6XX_CP_RB_CNTL_DEFAULT |
+		(ADRENO_FEATURE(adreno_dev, ADRENO_APRIV) ? 0 : (1 << 27));
+
+	memset(rb->preemption_desc->hostptr, 0x0, rb->preemption_desc->size);
+
+	kgsl_sharedmem_writel(rb->preemption_desc,
+		PREEMPT_RECORD(magic), A6XX_CP_CTXRECORD_MAGIC_REF);
+	kgsl_sharedmem_writel(rb->preemption_desc,
+		PREEMPT_RECORD(cntl), cp_rb_cntl);
+	kgsl_sharedmem_writeq(rb->preemption_desc,
+		PREEMPT_RECORD(rptr_addr), SCRATCH_RB_GPU_ADDR(
+		KGSL_DEVICE(adreno_dev), rb->id, rptr));
+	kgsl_sharedmem_writeq(rb->preemption_desc,
+		PREEMPT_RECORD(rbase), rb->buffer_desc->gpuaddr);
+}
+
+void a6xx_reset_preempt_records(struct adreno_device *adreno_dev)
+{
+	int i;
+	struct adreno_ringbuffer *rb;
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return;
+
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		reset_rb_preempt_record(adreno_dev, rb);
+	}
+}
+
+static int a6xx_preemption_ringbuffer_init(struct adreno_device *adreno_dev,
+	struct adreno_ringbuffer *rb)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_a6xx_core *a6xx_core = to_a6xx_core(adreno_dev);
+	u64 ctxt_record_size = A6XX_CP_CTXRECORD_SIZE_IN_BYTES;
+	int ret;
+
+	if (a6xx_core->ctxt_record_size)
+		ctxt_record_size = a6xx_core->ctxt_record_size;
+
+	ret = adreno_allocate_global(device, &rb->preemption_desc,
+		ctxt_record_size, SZ_16K, 0, KGSL_MEMDESC_PRIVILEGED,
+		"preemption_desc");
+	if (ret)
+		return ret;
+
+	ret = adreno_allocate_global(device, &rb->secure_preemption_desc,
+		ctxt_record_size, 0, KGSL_MEMFLAGS_SECURE,
+		KGSL_MEMDESC_PRIVILEGED, "preemption_desc");
+	if (ret)
+		return ret;
+
+	ret = adreno_allocate_global(device, &rb->perfcounter_save_restore_desc,
+			A6XX_CP_PERFCOUNTER_SAVE_RESTORE_SIZE, 0, 0,
+			KGSL_MEMDESC_PRIVILEGED,
+			"perfcounter_save_restore_desc");
+	if (ret)
+		return ret;
+
+	reset_rb_preempt_record(adreno_dev, rb);
+
+	return 0;
+}
+
+int a6xx_preemption_init(struct adreno_device *adreno_dev)
+{
+	u32 flags = ADRENO_FEATURE(adreno_dev, ADRENO_APRIV) ? KGSL_MEMDESC_PRIVILEGED : 0;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_iommu *iommu = KGSL_IOMMU(device);
+	struct adreno_preemption *preempt = &adreno_dev->preempt;
+	struct adreno_ringbuffer *rb;
+	int ret;
+	unsigned int i;
+
+	/* We are dependent on IOMMU to make preemption go on the CP side */
+	if (kgsl_mmu_get_mmutype(device) != KGSL_MMU_TYPE_IOMMU)
+		return -ENODEV;
+
+	INIT_WORK(&preempt->work, _a6xx_preemption_worker);
+
+	/* Allocate mem for storing preemption switch record */
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		ret = a6xx_preemption_ringbuffer_init(adreno_dev, rb);
+		if (ret)
+			return ret;
+	}
+
+	ret = adreno_allocate_global(device, &preempt->scratch,
+		PAGE_SIZE, 0, 0, flags, "preempt_scratch");
+	if (ret)
+		return ret;
+
+	/* Allocate mem for storing preemption smmu record */
+	if (kgsl_mmu_is_perprocess(&device->mmu)) {
+		ret = adreno_allocate_global(device, &iommu->smmu_info, PAGE_SIZE, 0,
+			KGSL_MEMFLAGS_GPUREADONLY, KGSL_MEMDESC_PRIVILEGED,
+			"smmu_info");
+		if (ret)
+			return ret;
+	}
+
+	/*
+	 * First 28 dwords of the device scratch buffer are used to store shadow rb data.
+	 * Reserve 11 dwords in the device scratch buffer from SCRATCH_POSTAMBLE_OFFSET for
+	 * KMD postamble pm4 packets. This should be in *device->scratch* so that userspace
+	 * cannot access it.
+	 */
+	if (!adreno_dev->perfcounter) {
+		u32 *postamble = device->scratch->hostptr + SCRATCH_POSTAMBLE_OFFSET;
+		u32 count = 0;
+
+		postamble[count++] = cp_type7_packet(CP_REG_RMW, 3);
+		postamble[count++] = A6XX_RBBM_PERFCTR_SRAM_INIT_CMD;
+		postamble[count++] = 0x0;
+		postamble[count++] = 0x1;
+
+		postamble[count++] = cp_type7_packet(CP_WAIT_REG_MEM, 6);
+		postamble[count++] = 0x3;
+		postamble[count++] = A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS;
+		postamble[count++] = 0x0;
+		postamble[count++] = 0x1;
+		postamble[count++] = 0x1;
+		postamble[count++] = 0x0;
+
+		preempt->postamble_len = count;
+	}
+
+	set_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv);
+	return 0;
+}
+
+int a6xx_preemption_context_init(struct kgsl_context *context)
+{
+	struct kgsl_device *device = context->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	uint64_t flags = 0;
+
+	if (!adreno_preemption_feature_set(adreno_dev))
+		return 0;
+
+	if (context->flags & KGSL_CONTEXT_SECURE)
+		flags |= KGSL_MEMFLAGS_SECURE;
+
+	if (is_compat_task())
+		flags |= KGSL_MEMFLAGS_FORCE_32BIT;
+
+	/*
+	 * gpumem_alloc_entry takes an extra refcount. Put it only when
+	 * destroying the context to keep the context record valid
+	 */
+	context->user_ctxt_record = gpumem_alloc_entry(context->dev_priv,
+			A6XX_CP_CTXRECORD_USER_RESTORE_SIZE, flags);
+	if (IS_ERR(context->user_ctxt_record)) {
+		int ret = PTR_ERR(context->user_ctxt_record);
+
+		context->user_ctxt_record = NULL;
+		return ret;
+	}
+
+	return 0;
+}
--- a/qcom/opensource/graphics-kernel/adreno_a6xx_rgmu.c
+++ b/qcom/opensource/graphics-kernel/adreno_a6xx_rgmu.c
--- a/qcom/opensource/graphics-kernel/adreno_a6xx_rgmu.h
+++ b/qcom/opensource/graphics-kernel/adreno_a6xx_rgmu.h
@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2018-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+#ifndef __ADRENO_A6XX_RGMU_H
+#define __ADRENO_A6XX_RGMU_H
+
+#define RGMU_AO_IRQ_FENCE_ERR		BIT(3)
+#define RGMU_AO_IRQ_MASK			RGMU_AO_IRQ_FENCE_ERR
+
+#define RGMU_OOB_IRQ_ERR_MSG		BIT(24)
+#define RGMU_OOB_IRQ_ACK_MASK		GENMASK(23, 16)
+#define RGMU_OOB_IRQ_ERR_MSG_MASK	GENMASK(31, 24)
+#define RGMU_OOB_IRQ_MASK		RGMU_OOB_IRQ_ERR_MSG_MASK
+
+#define MAX_RGMU_CLKS  8
+
+enum {
+	/* @RGMU_PRIV_FIRST_BOOT_DONE: The very first ggpu boot is done */
+	RGMU_PRIV_FIRST_BOOT_DONE,
+	/* @RGMU_PRIV_GPU_STARTED: GPU has been started */
+	RGMU_PRIV_GPU_STARTED,
+	/* @RGMU_PRIV_PM_SUSPEND: The rgmu driver is suspended */
+	RGMU_PRIV_PM_SUSPEND,
+};
+
+/**
+ * struct a6xx_rgmu_device - rGMU device structure
+ * @ver: RGMU firmware version
+ * @rgmu_interrupt_num: RGMU interrupt number
+ * @oob_interrupt_num: number of RGMU asserted OOB interrupt
+ * @fw_hostptr: Buffer which holds the RGMU firmware
+ * @fw_size: Size of RGMU firmware buffer
+ * @clks: RGMU clocks including the GPU
+ * @gpu_clk: Pointer to GPU core clock
+ * @rgmu_clk: Pointer to rgmu clock
+ * @flags: RGMU flags
+ * @idle_level: Minimal GPU idle power level
+ * @fault_count: RGMU fault count
+ */
+struct a6xx_rgmu_device {
+	u32 ver;
+	struct platform_device *pdev;
+	unsigned int rgmu_interrupt_num;
+	unsigned int oob_interrupt_num;
+	unsigned int *fw_hostptr;
+	uint32_t fw_size;
+	struct clk_bulk_data *clks;
+	/** @num_clks: Number of clocks in @clks */
+	int num_clks;
+	struct clk *gpu_clk;
+	struct clk *rgmu_clk;
+	unsigned int idle_level;
+	unsigned int fault_count;
+	/** @flags: rgmu internal flags */
+	unsigned long flags;
+	/** @num_oob_perfcntr: Number of active oob_perfcntr requests */
+	u32 num_oob_perfcntr;
+};
+
+/**
+ * a6xx_rgmu_device_probe - Probe a6xx rgmu resources
+ * @pdev: Pointer to the platform device
+ * @chipid: Chipid of the target
+ * @gpucore: Pointer to the gpucore
+ *
+ * The target specific probe function for rgmu based a6xx targets.
+ */
+int a6xx_rgmu_device_probe(struct platform_device *pdev,
+	u32 chipid, const struct adreno_gpu_core *gpucore);
+
+/**
+ * a6xx_rgmu_reset - Reset and restart the rgmu
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int a6xx_rgmu_reset(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_rgmu_snapshot - Take snapshot for rgmu based targets
+ * @adreno_dev: Pointer to the adreno device
+ * @snapshot: Pointer to the snapshot
+ *
+ * This function halts rgmu execution if we hit a rgmu
+ * fault. And then, it takes rgmu and gpu snapshot.
+ */
+void a6xx_rgmu_snapshot(struct adreno_device *adreno_dev,
+	struct kgsl_snapshot *snapshot);
+
+/**
+ * a6xx_rgmu_add_to_minidump - Register a6xx_device with va minidump
+ * @adreno_dev: Pointer to the adreno device
+ */
+int a6xx_rgmu_add_to_minidump(struct adreno_device *adreno_dev);
+
+/**
+ * a6xx_rgmu_gx_is_on() - Check if GX is on using pwr status register
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * This check should only be performed if the keepalive bit is set or it
+ * can be guaranteed that the power state of the GPU will remain unchanged
+ */
+bool a6xx_rgmu_gx_is_on(struct adreno_device *adreno_dev);
+#endif
--- a/qcom/opensource/graphics-kernel/adreno_a6xx_ringbuffer.c
+++ b/qcom/opensource/graphics-kernel/adreno_a6xx_ringbuffer.c
@ -0,0 +1,578 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include "adreno.h"
+#include "adreno_a6xx.h"
+#include "adreno_pm4types.h"
+#include "adreno_ringbuffer.h"
+#include "adreno_trace.h"
+#include "kgsl_trace.h"
+
+static int a6xx_rb_pagetable_switch(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb, struct adreno_context *drawctxt,
+		struct kgsl_pagetable *pagetable, u32 *cmds)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u64 ttbr0 = kgsl_mmu_pagetable_get_ttbr0(pagetable);
+	int count = 0;
+	u32 id = drawctxt ? drawctxt->base.id : 0;
+
+	if (pagetable == device->mmu.defaultpagetable)
+		return 0;
+
+	cmds[count++] = cp_type7_packet(CP_SMMU_TABLE_UPDATE, 3);
+	cmds[count++] = lower_32_bits(ttbr0);
+	cmds[count++] = upper_32_bits(ttbr0);
+	cmds[count++] = id;
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_APRIV)) {
+		cmds[count++] = cp_type7_packet(CP_WAIT_FOR_IDLE, 0);
+		cmds[count++] = cp_type7_packet(CP_WAIT_FOR_ME, 0);
+		cmds[count++] = cp_type4_packet(A6XX_CP_MISC_CNTL, 1);
+		cmds[count++] = 1;
+	}
+
+	cmds[count++] = cp_type7_packet(CP_MEM_WRITE, 5);
+	cmds[count++] = lower_32_bits(SCRATCH_RB_GPU_ADDR(device,
+				rb->id, ttbr0));
+	cmds[count++] = upper_32_bits(SCRATCH_RB_GPU_ADDR(device,
+				rb->id, ttbr0));
+	cmds[count++] = lower_32_bits(ttbr0);
+	cmds[count++] = upper_32_bits(ttbr0);
+	cmds[count++] = id;
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_APRIV)) {
+		cmds[count++] = cp_type7_packet(CP_WAIT_FOR_IDLE, 0);
+		cmds[count++] = cp_type7_packet(CP_WAIT_FOR_ME, 0);
+		cmds[count++] = cp_type4_packet(A6XX_CP_MISC_CNTL, 1);
+		cmds[count++] = 0;
+	}
+
+	return count;
+}
+
+static int a6xx_rb_context_switch(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb,
+		struct adreno_context *drawctxt)
+{
+	struct kgsl_pagetable *pagetable =
+		adreno_drawctxt_get_pagetable(drawctxt);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int count = 0;
+	u32 cmds[41];
+
+	if (adreno_drawctxt_get_pagetable(rb->drawctxt_active) != pagetable) {
+
+		/* Clear performance counters during context switches */
+		if (!adreno_dev->perfcounter) {
+			cmds[count++] = cp_type4_packet(A6XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1);
+			cmds[count++] = 0x1;
+		}
+
+		count += a6xx_rb_pagetable_switch(adreno_dev, rb, drawctxt,
+			pagetable, &cmds[count]);
+
+		/* Wait for performance counter clear to finish */
+		if (!adreno_dev->perfcounter) {
+			cmds[count++] = cp_type7_packet(CP_WAIT_REG_MEM, 6);
+			cmds[count++] = 0x3;
+			cmds[count++] = A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS;
+			cmds[count++] = 0x0;
+			cmds[count++] = 0x1;
+			cmds[count++] = 0x1;
+			cmds[count++] = 0x0;
+		}
+	}
+
+	cmds[count++] = cp_type7_packet(CP_NOP, 1);
+	cmds[count++] = CONTEXT_TO_MEM_IDENTIFIER;
+
+	cmds[count++] = cp_type7_packet(CP_MEM_WRITE, 3);
+	cmds[count++] = lower_32_bits(MEMSTORE_RB_GPU_ADDR(device, rb,
+				current_context));
+	cmds[count++] = upper_32_bits(MEMSTORE_RB_GPU_ADDR(device, rb,
+				current_context));
+	cmds[count++] = drawctxt->base.id;
+
+	cmds[count++] = cp_type7_packet(CP_MEM_WRITE, 3);
+	cmds[count++] = lower_32_bits(MEMSTORE_ID_GPU_ADDR(device,
+		KGSL_MEMSTORE_GLOBAL, current_context));
+	cmds[count++] = upper_32_bits(MEMSTORE_ID_GPU_ADDR(device,
+		KGSL_MEMSTORE_GLOBAL, current_context));
+	cmds[count++] = drawctxt->base.id;
+
+	cmds[count++] = cp_type7_packet(CP_EVENT_WRITE, 1);
+	cmds[count++] = 0x31;
+
+	return a6xx_ringbuffer_addcmds(adreno_dev, rb, NULL, F_NOTPROTECTED,
+			cmds, count, 0, NULL);
+}
+
+#define RB_SOPTIMESTAMP(device, rb) \
+	       MEMSTORE_RB_GPU_ADDR(device, rb, soptimestamp)
+#define CTXT_SOPTIMESTAMP(device, drawctxt) \
+	       MEMSTORE_ID_GPU_ADDR(device, (drawctxt)->base.id, soptimestamp)
+
+#define RB_EOPTIMESTAMP(device, rb) \
+	       MEMSTORE_RB_GPU_ADDR(device, rb, eoptimestamp)
+#define CTXT_EOPTIMESTAMP(device, drawctxt) \
+	       MEMSTORE_ID_GPU_ADDR(device, (drawctxt)->base.id, eoptimestamp)
+
+int a6xx_ringbuffer_submit(struct adreno_ringbuffer *rb,
+		struct adreno_submit_time *time, bool sync)
+{
+	struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret = 0;
+	unsigned long flags;
+
+	adreno_get_submit_time(adreno_dev, rb, time);
+	adreno_profile_submit_time(time);
+
+	if (sync && !ADRENO_FEATURE(adreno_dev, ADRENO_APRIV)) {
+		u32 *cmds = adreno_ringbuffer_allocspace(rb, 3);
+
+		if (IS_ERR(cmds))
+			return PTR_ERR(cmds);
+
+		cmds[0] = cp_type7_packet(CP_WHERE_AM_I, 2);
+		cmds[1] = lower_32_bits(SCRATCH_RB_GPU_ADDR(device, rb->id,
+				rptr));
+		cmds[2] = upper_32_bits(SCRATCH_RB_GPU_ADDR(device, rb->id,
+				rptr));
+	}
+
+	spin_lock_irqsave(&rb->preempt_lock, flags);
+	if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE)) {
+		if (adreno_dev->cur_rb == rb) {
+			kgsl_pwrscale_busy(device);
+			ret = a6xx_fenced_write(adreno_dev,
+				A6XX_CP_RB_WPTR, rb->_wptr,
+				FENCE_STATUS_WRITEDROPPED0_MASK);
+			rb->skip_inline_wptr = false;
+		}
+	} else {
+		if (adreno_dev->cur_rb == rb)
+			rb->skip_inline_wptr = true;
+	}
+
+	rb->wptr = rb->_wptr;
+	spin_unlock_irqrestore(&rb->preempt_lock, flags);
+
+	if (ret) {
+		/*
+		 * If WPTR update fails, take inline snapshot and trigger
+		 * recovery.
+		 */
+		gmu_core_fault_snapshot(device);
+		adreno_dispatcher_fault(adreno_dev,
+			ADRENO_GMU_FAULT_SKIP_SNAPSHOT);
+	}
+
+	return ret;
+}
+
+int a6xx_ringbuffer_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int i, ret;
+
+	ret = adreno_allocate_global(device, &device->scratch, PAGE_SIZE,
+		0, 0, KGSL_MEMDESC_RANDOM | KGSL_MEMDESC_PRIVILEGED,
+		"scratch");
+	if (ret)
+		return ret;
+
+	adreno_dev->cur_rb = &(adreno_dev->ringbuffers[0]);
+
+	if (!adreno_preemption_feature_set(adreno_dev)) {
+		adreno_dev->num_ringbuffers = 1;
+		return adreno_ringbuffer_setup(adreno_dev,
+			&adreno_dev->ringbuffers[0], 0);
+	}
+
+	adreno_dev->num_ringbuffers = ARRAY_SIZE(adreno_dev->ringbuffers);
+
+	for (i = 0; i < adreno_dev->num_ringbuffers; i++) {
+		int ret;
+
+		ret = adreno_ringbuffer_setup(adreno_dev,
+			&adreno_dev->ringbuffers[i], i);
+		if (ret)
+			return ret;
+	}
+
+	timer_setup(&adreno_dev->preempt.timer, adreno_preemption_timer, 0);
+	a6xx_preemption_init(adreno_dev);
+	return 0;
+}
+
+#define A6XX_SUBMIT_MAX 79
+
+int a6xx_ringbuffer_addcmds(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb, struct adreno_context *drawctxt,
+		u32 flags, u32 *in, u32 dwords, u32 timestamp,
+		struct adreno_submit_time *time)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 size = A6XX_SUBMIT_MAX + dwords;
+	u32 *cmds, index = 0;
+	u64 profile_gpuaddr;
+	u32 profile_dwords;
+
+	if (adreno_drawctxt_detached(drawctxt))
+		return -ENOENT;
+
+	if (adreno_gpu_fault(adreno_dev) != 0)
+		return -EPROTO;
+
+	rb->timestamp++;
+
+	if (drawctxt)
+		drawctxt->internal_timestamp = rb->timestamp;
+
+	/*
+	 * if APRIV is enabled we assume all submissions are run with protected
+	 * mode off
+	 */
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_APRIV))
+		flags &= ~F_NOTPROTECTED;
+
+	cmds = adreno_ringbuffer_allocspace(rb, size);
+	if (IS_ERR(cmds))
+		return PTR_ERR(cmds);
+
+	/* Identify the start of a command */
+	cmds[index++] = cp_type7_packet(CP_NOP, 1);
+	cmds[index++] = drawctxt ? CMD_IDENTIFIER : CMD_INTERNAL_IDENTIFIER;
+
+	/* This is 25 dwords when drawctxt is not NULL and perfcounter needs to be zapped */
+	index += a6xx_preemption_pre_ibsubmit(adreno_dev, rb, drawctxt,
+		&cmds[index]);
+
+	cmds[index++] = cp_type7_packet(CP_SET_MARKER, 1);
+	cmds[index++] = 0x101; /* IFPC disable */
+
+	profile_gpuaddr = adreno_profile_preib_processing(adreno_dev,
+		drawctxt, &profile_dwords);
+
+	if (profile_gpuaddr) {
+		cmds[index++] = cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3);
+		cmds[index++] = lower_32_bits(profile_gpuaddr);
+		cmds[index++] = upper_32_bits(profile_gpuaddr);
+		cmds[index++] = profile_dwords;
+	}
+
+	if (drawctxt) {
+		cmds[index++] = cp_type7_packet(CP_MEM_WRITE, 3);
+		cmds[index++] = lower_32_bits(CTXT_SOPTIMESTAMP(device,
+					drawctxt));
+		cmds[index++] = upper_32_bits(CTXT_SOPTIMESTAMP(device,
+					drawctxt));
+		cmds[index++] = timestamp;
+	}
+
+	cmds[index++] = cp_type7_packet(CP_MEM_WRITE, 3);
+	cmds[index++] = lower_32_bits(RB_SOPTIMESTAMP(device, rb));
+	cmds[index++] = upper_32_bits(RB_SOPTIMESTAMP(device, rb));
+	cmds[index++] = rb->timestamp;
+
+	if (IS_SECURE(flags)) {
+		cmds[index++] = cp_type7_packet(CP_SET_SECURE_MODE, 1);
+		cmds[index++] = 1;
+	}
+
+	if (IS_NOTPROTECTED(flags)) {
+		cmds[index++] = cp_type7_packet(CP_SET_PROTECTED_MODE, 1);
+		cmds[index++] = 0;
+	}
+
+	memcpy(&cmds[index], in, dwords << 2);
+	index += dwords;
+
+	if (IS_NOTPROTECTED(flags)) {
+		cmds[index++] = cp_type7_packet(CP_SET_PROTECTED_MODE, 1);
+		cmds[index++] = 1;
+	}
+
+	profile_gpuaddr = adreno_profile_postib_processing(adreno_dev,
+		drawctxt, &dwords);
+
+	if (profile_gpuaddr) {
+		cmds[index++] = cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3);
+		cmds[index++] = lower_32_bits(profile_gpuaddr);
+		cmds[index++] = upper_32_bits(profile_gpuaddr);
+		cmds[index++] = profile_dwords;
+	}
+
+	if (test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE, &device->mmu.pfpolicy))
+		cmds[index++] = cp_type7_packet(CP_WAIT_MEM_WRITES, 0);
+
+	/*
+	 * If this is an internal command, just write the ringbuffer timestamp,
+	 * otherwise, write both
+	 */
+	if (!drawctxt) {
+		cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4);
+		cmds[index++] = CACHE_FLUSH_TS | (1 << 31);
+		cmds[index++] = lower_32_bits(RB_EOPTIMESTAMP(device, rb));
+		cmds[index++] = upper_32_bits(RB_EOPTIMESTAMP(device, rb));
+		cmds[index++] = rb->timestamp;
+	} else {
+		cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4);
+		cmds[index++] = CACHE_FLUSH_TS | (1 << 31);
+		cmds[index++] = lower_32_bits(CTXT_EOPTIMESTAMP(device,
+					drawctxt));
+		cmds[index++] = upper_32_bits(CTXT_EOPTIMESTAMP(device,
+					drawctxt));
+		cmds[index++] = timestamp;
+
+		cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4);
+		cmds[index++] = CACHE_FLUSH_TS;
+		cmds[index++] = lower_32_bits(RB_EOPTIMESTAMP(device, rb));
+		cmds[index++] = upper_32_bits(RB_EOPTIMESTAMP(device, rb));
+		cmds[index++] = rb->timestamp;
+	}
+
+	cmds[index++] = cp_type7_packet(CP_SET_MARKER, 1);
+	cmds[index++] = 0x100; /* IFPC enable */
+
+	if (IS_WFI(flags))
+		cmds[index++] = cp_type7_packet(CP_WAIT_FOR_IDLE, 0);
+
+	if (IS_SECURE(flags)) {
+		cmds[index++] = cp_type7_packet(CP_SET_SECURE_MODE, 1);
+		cmds[index++] = 0;
+	}
+
+	/* 10 dwords */
+	index += a6xx_preemption_post_ibsubmit(adreno_dev, &cmds[index]);
+
+	/* Adjust the thing for the number of dwords we actually wrote */
+	rb->_wptr -= (size - index);
+
+	return a6xx_ringbuffer_submit(rb, time,
+			!adreno_is_preemption_enabled(adreno_dev));
+}
+
+static u32 a6xx_get_alwayson_counter(u32 *cmds, u64 gpuaddr)
+{
+	cmds[0] = cp_type7_packet(CP_REG_TO_MEM, 3);
+	cmds[1] = A6XX_CP_ALWAYS_ON_COUNTER_LO | (1 << 30) | (2 << 18);
+	cmds[2] = lower_32_bits(gpuaddr);
+	cmds[3] = upper_32_bits(gpuaddr);
+
+	return 4;
+}
+
+static u32 a6xx_get_alwayson_context(u32 *cmds, u64 gpuaddr)
+{
+	cmds[0] = cp_type7_packet(CP_REG_TO_MEM, 3);
+	cmds[1] = A6XX_CP_ALWAYS_ON_CONTEXT_LO | (1 << 30) | (2 << 18);
+	cmds[2] = lower_32_bits(gpuaddr);
+	cmds[3] = upper_32_bits(gpuaddr);
+
+	return 4;
+}
+
+#define PROFILE_IB_DWORDS 4
+#define PROFILE_IB_SLOTS (PAGE_SIZE / (PROFILE_IB_DWORDS << 2))
+
+static u64 a6xx_get_user_profiling_ib(struct adreno_ringbuffer *rb,
+		struct kgsl_drawobj_cmd *cmdobj, u32 target_offset, u32 *cmds)
+{
+	u32 offset, *ib, dwords;
+	u64 gpuaddr;
+
+	if (IS_ERR(rb->profile_desc))
+		return 0;
+
+	offset = rb->profile_index * (PROFILE_IB_DWORDS << 2);
+	ib = rb->profile_desc->hostptr + offset;
+	gpuaddr = rb->profile_desc->gpuaddr + offset;
+	dwords = a6xx_get_alwayson_counter(ib,
+		cmdobj->profiling_buffer_gpuaddr + target_offset);
+
+	cmds[0] = cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3);
+	cmds[1] = lower_32_bits(gpuaddr);
+	cmds[2] = upper_32_bits(gpuaddr);
+	cmds[3] = dwords;
+
+	rb->profile_index = (rb->profile_index + 1) % PROFILE_IB_SLOTS;
+
+	return 4;
+}
+
+static int a6xx_drawctxt_switch(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb,
+		struct adreno_context *drawctxt)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	if (rb->drawctxt_active == drawctxt)
+		return 0;
+
+	if (kgsl_context_detached(&drawctxt->base))
+		return -ENOENT;
+
+	if (!_kgsl_context_get(&drawctxt->base))
+		return -ENOENT;
+
+	ret = a6xx_rb_context_switch(adreno_dev, rb, drawctxt);
+	if (ret) {
+		kgsl_context_put(&drawctxt->base);
+		return ret;
+	}
+
+	trace_adreno_drawctxt_switch(rb, drawctxt);
+
+	/* Release the current drawctxt as soon as the new one is switched */
+	adreno_put_drawctxt_on_timestamp(device, rb->drawctxt_active,
+		rb, rb->timestamp);
+
+	rb->drawctxt_active = drawctxt;
+	return 0;
+}
+
+#define A6XX_USER_PROFILE_IB(rb, cmdobj, cmds, field) \
+	a6xx_get_user_profiling_ib((rb), (cmdobj), \
+		offsetof(struct kgsl_drawobj_profiling_buffer, field), \
+		(cmds))
+
+#define A6XX_KERNEL_PROFILE(dev, cmdobj, cmds, field) \
+	a6xx_get_alwayson_counter((cmds), \
+		(dev)->profile_buffer->gpuaddr + \
+			ADRENO_DRAWOBJ_PROFILE_OFFSET((cmdobj)->profile_index, \
+				field))
+
+#define A6XX_KERNEL_PROFILE_CONTEXT(dev, cmdobj, cmds, field) \
+	a6xx_get_alwayson_context((cmds), \
+		(dev)->profile_buffer->gpuaddr + \
+			ADRENO_DRAWOBJ_PROFILE_OFFSET((cmdobj)->profile_index, \
+				field))
+
+#define A6XX_COMMAND_DWORDS 40
+
+int a6xx_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
+		struct kgsl_drawobj_cmd *cmdobj, u32 flags,
+		struct adreno_submit_time *time)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context);
+	struct adreno_ringbuffer *rb = drawctxt->rb;
+	int ret = 0, numibs = 0, index = 0;
+	u32 *cmds;
+
+	/* Count the number of IBs (if we are not skipping) */
+	if (!IS_SKIP(flags)) {
+		struct list_head *tmp;
+
+		list_for_each(tmp, &cmdobj->cmdlist)
+			numibs++;
+	}
+
+	cmds = kvmalloc((A6XX_COMMAND_DWORDS + (numibs * 5)) << 2, GFP_KERNEL);
+	if (!cmds) {
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	cmds[index++] = cp_type7_packet(CP_NOP, 1);
+	cmds[index++] = START_IB_IDENTIFIER;
+
+	/* Kernel profiling: 8 dwords */
+	if (IS_KERNEL_PROFILE(flags)) {
+		index += A6XX_KERNEL_PROFILE(adreno_dev, cmdobj, &cmds[index],
+			started);
+		index += A6XX_KERNEL_PROFILE_CONTEXT(adreno_dev, cmdobj, &cmds[index],
+			ctx_start);
+	}
+
+	/* User profiling: 4 dwords */
+	if (IS_USER_PROFILE(flags))
+		index += A6XX_USER_PROFILE_IB(rb, cmdobj, &cmds[index],
+			gpu_ticks_submitted);
+
+	if (numibs) {
+		struct kgsl_memobj_node *ib;
+
+		cmds[index++] = cp_type7_packet(CP_SET_MARKER, 1);
+		cmds[index++] = 0x00d; /* IB1LIST start */
+
+		list_for_each_entry(ib, &cmdobj->cmdlist, node) {
+			if (ib->priv & MEMOBJ_SKIP ||
+			    (ib->flags & KGSL_CMDLIST_CTXTSWITCH_PREAMBLE
+			     && !IS_PREAMBLE(flags)))
+				cmds[index++] = cp_type7_packet(CP_NOP, 4);
+
+			cmds[index++] =
+				cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3);
+			cmds[index++] = lower_32_bits(ib->gpuaddr);
+			cmds[index++] = upper_32_bits(ib->gpuaddr);
+
+			/* Double check that IB_PRIV is never set */
+			cmds[index++] = (ib->size >> 2) & 0xfffff;
+		}
+
+		cmds[index++] = cp_type7_packet(CP_SET_MARKER, 1);
+		cmds[index++] = 0x00e; /* IB1LIST end */
+	}
+
+	/* CCU invalidate */
+	cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 1);
+	cmds[index++] = 24;
+
+	cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 1);
+	cmds[index++] = 25;
+
+	/* 8 dwords */
+	if (IS_KERNEL_PROFILE(flags)) {
+		index += A6XX_KERNEL_PROFILE(adreno_dev, cmdobj, &cmds[index],
+			retired);
+		index += A6XX_KERNEL_PROFILE_CONTEXT(adreno_dev, cmdobj, &cmds[index],
+			ctx_end);
+	}
+
+	/* 4 dwords */
+	if (IS_USER_PROFILE(flags))
+		index += A6XX_USER_PROFILE_IB(rb, cmdobj, &cmds[index],
+			gpu_ticks_retired);
+
+	cmds[index++] = cp_type7_packet(CP_NOP, 1);
+	cmds[index++] = END_IB_IDENTIFIER;
+
+	ret = a6xx_drawctxt_switch(adreno_dev, rb, drawctxt);
+
+	/*
+	 * In the unlikely event of an error in the drawctxt switch,
+	 * treat it like a hang
+	 */
+	if (ret) {
+		/*
+		 * It is "normal" to get a -ENOSPC or a -ENOENT. Don't log it,
+		 * the upper layers know how to handle it
+		 */
+		if (ret != -ENOSPC && ret != -ENOENT)
+			dev_err(device->dev,
+				     "Unable to switch draw context: %d\n",
+				     ret);
+		goto done;
+	}
+
+	adreno_drawobj_set_constraint(device, drawobj);
+
+	ret = a6xx_ringbuffer_addcmds(adreno_dev, drawctxt->rb, drawctxt,
+		flags, cmds, index, drawobj->timestamp, time);
+
+done:
+	trace_kgsl_issueibcmds(device, drawctxt->base.id, numibs,
+		drawobj->timestamp, drawobj->flags, ret, drawctxt->type);
+
+	kvfree(cmds);
+	return ret;
+}
--- a/qcom/opensource/graphics-kernel/adreno_a6xx_rpmh.c
+++ b/qcom/opensource/graphics-kernel/adreno_a6xx_rpmh.c
@ -0,0 +1,515 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/types.h>
+#include <soc/qcom/cmd-db.h>
+#include <soc/qcom/tcs.h>
+
+#include "adreno.h"
+#include "adreno_a6xx.h"
+#include "adreno_hfi.h"
+#include "kgsl_bus.h"
+#include "kgsl_device.h"
+
+struct rpmh_arc_vals {
+	u32 num;
+	const u16 *val;
+};
+
+struct bcm {
+	const char *name;
+	u32 buswidth;
+	u32 channels;
+	u32 unit;
+	u16 width;
+	u8 vcd;
+	bool fixed;
+};
+
+struct bcm_data {
+	__le32 unit;
+	__le16 width;
+	u8 vcd;
+	u8 reserved;
+};
+
+struct rpmh_bw_votes {
+	u32 wait_bitmask;
+	u32 num_cmds;
+	u32 *addrs;
+	u32 num_levels;
+	u32 **cmds;
+};
+
+#define ARC_VOTE_SET(pri, sec, vlvl) \
+	((((vlvl) & 0xFFFF) << 16) | (((sec) & 0xFF) << 8) | ((pri) & 0xFF))
+
+static int rpmh_arc_cmds(struct rpmh_arc_vals *arc, const char *res_id)
+{
+	size_t len = 0;
+
+	arc->val = cmd_db_read_aux_data(res_id, &len);
+
+	/*
+	 * cmd_db_read_aux_data() gives us a zero-padded table of
+	 * size len that contains the arc values. To determine the
+	 * number of arc values, we loop through the table and count
+	 * them until we get to the end of the buffer or hit the
+	 * zero padding.
+	 */
+	for (arc->num = 1; arc->num < (len >> 1); arc->num++) {
+		if (arc->val[arc->num - 1] != 0 &&  arc->val[arc->num] == 0)
+			break;
+	}
+
+	return 0;
+}
+
+static int setup_volt_dependency_tbl(uint32_t *votes,
+		struct rpmh_arc_vals *pri_rail, struct rpmh_arc_vals *sec_rail,
+		u16 *vlvl, unsigned int num_entries)
+{
+	int i, j, k;
+	uint16_t cur_vlvl;
+	bool found_match;
+
+	/* i tracks current KGSL GPU frequency table entry
+	 * j tracks secondary rail voltage table entry
+	 * k tracks primary rail voltage table entry
+	 */
+	for (i = 0; i < num_entries; i++) {
+		found_match = false;
+
+		/* Look for a primary rail voltage that matches a VLVL level */
+		for (k = 0; k < pri_rail->num; k++) {
+			if (pri_rail->val[k] >= vlvl[i]) {
+				cur_vlvl = pri_rail->val[k];
+				found_match = true;
+				break;
+			}
+		}
+
+		/* If we did not find a matching VLVL level then abort */
+		if (!found_match)
+			return -EINVAL;
+
+		/*
+		 * Look for a secondary rail index whose VLVL value
+		 * is greater than or equal to the VLVL value of the
+		 * corresponding index of the primary rail
+		 */
+		for (j = 0; j < sec_rail->num; j++) {
+			if (sec_rail->val[j] >= cur_vlvl ||
+					j + 1 == sec_rail->num)
+				break;
+		}
+
+		if (j == sec_rail->num)
+			j = 0;
+
+		votes[i] = ARC_VOTE_SET(k, j, cur_vlvl);
+	}
+
+	return 0;
+}
+
+/* Generate a set of bandwidth votes for the list of BCMs */
+static void tcs_cmd_data(struct bcm *bcms, int count, u32 ab, u32 ib,
+		u32 *data)
+{
+	int i;
+
+	for (i = 0; i < count; i++) {
+		bool valid = true;
+		bool commit = false;
+		u64 avg, peak, x, y;
+
+		if (i == count - 1 || bcms[i].vcd != bcms[i + 1].vcd)
+			commit = true;
+
+		/*
+		 * On a660, the "ACV" y vote should be 0x08 if there is a valid
+		 * vote and 0x00 if not. This is kind of hacky and a660 specific
+		 * but we can clean it up when we add a new target
+		 */
+		if (bcms[i].fixed) {
+			if (!ab && !ib)
+				data[i] = BCM_TCS_CMD(commit, false, 0x0, 0x0);
+			else
+				data[i] = BCM_TCS_CMD(commit, true, 0x0, 0x8);
+			continue;
+		}
+
+		/* Multiple the bandwidth by the width of the connection */
+		avg = ((u64) ab) * bcms[i].width;
+
+		/* And then divide by the total width across channels */
+		do_div(avg, bcms[i].buswidth * bcms[i].channels);
+
+		peak = ((u64) ib) * bcms[i].width;
+		do_div(peak, bcms[i].buswidth);
+
+		/* Input bandwidth value is in KBps */
+		x = avg * 1000ULL;
+		do_div(x, bcms[i].unit);
+
+		/* Input bandwidth value is in KBps */
+		y = peak * 1000ULL;
+		do_div(y, bcms[i].unit);
+
+		/*
+		 * If a bandwidth value was specified but the calculation ends
+		 * rounding down to zero, set a minimum level
+		 */
+		if (ab && x == 0)
+			x = 1;
+
+		if (ib && y == 0)
+			y = 1;
+
+		x = min_t(u64, x, BCM_TCS_CMD_VOTE_MASK);
+		y = min_t(u64, y, BCM_TCS_CMD_VOTE_MASK);
+
+		if (!x && !y)
+			valid = false;
+
+		data[i] = BCM_TCS_CMD(commit, valid, x, y);
+	}
+}
+
+static void free_rpmh_bw_votes(struct rpmh_bw_votes *votes)
+{
+	int i;
+
+	if (!votes)
+		return;
+
+	for (i = 0; votes->cmds && i < votes->num_levels; i++)
+		kfree(votes->cmds[i]);
+
+	kfree(votes->cmds);
+	kfree(votes->addrs);
+	kfree(votes);
+}
+
+/* Build the votes table from the specified bandwidth levels */
+static struct rpmh_bw_votes *build_rpmh_bw_votes(struct bcm *bcms,
+		int bcm_count, u32 *levels, int levels_count)
+{
+	struct rpmh_bw_votes *votes;
+	int i;
+
+	votes = kzalloc(sizeof(*votes), GFP_KERNEL);
+	if (!votes)
+		return ERR_PTR(-ENOMEM);
+
+	votes->addrs = kcalloc(bcm_count, sizeof(*votes->cmds), GFP_KERNEL);
+	if (!votes->addrs) {
+		free_rpmh_bw_votes(votes);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	votes->cmds = kcalloc(levels_count, sizeof(*votes->cmds), GFP_KERNEL);
+	if (!votes->cmds) {
+		free_rpmh_bw_votes(votes);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	votes->num_cmds = bcm_count;
+	votes->num_levels = levels_count;
+
+	/* Get the cmd-db information for each BCM */
+	for (i = 0; i < bcm_count; i++) {
+		size_t l;
+		const struct bcm_data *data;
+
+		data = cmd_db_read_aux_data(bcms[i].name, &l);
+
+		votes->addrs[i] = cmd_db_read_addr(bcms[i].name);
+
+		bcms[i].unit = le32_to_cpu(data->unit);
+		bcms[i].width = le16_to_cpu(data->width);
+		bcms[i].vcd = data->vcd;
+	}
+
+	for (i = 0; i < bcm_count; i++) {
+		if (i == (bcm_count - 1) || bcms[i].vcd != bcms[i + 1].vcd)
+			votes->wait_bitmask |= (1 << i);
+	}
+
+	for (i = 0; i < levels_count; i++) {
+		votes->cmds[i] = kcalloc(bcm_count, sizeof(u32), GFP_KERNEL);
+		if (!votes->cmds[i]) {
+			free_rpmh_bw_votes(votes);
+			return ERR_PTR(-ENOMEM);
+		}
+
+		tcs_cmd_data(bcms, bcm_count, 0, levels[i], votes->cmds[i]);
+	}
+
+	return votes;
+}
+
+/*
+ * setup_gmu_arc_votes - Build the gmu voting table
+ * @adreno_dev: Pointer to adreno device
+ * @pri_rail: Pointer to primary power rail vlvl table
+ * @sec_rail: Pointer to second/dependent power rail vlvl table
+ * @freqs: List of GMU frequencies
+ * @vlvls: List of GMU voltage levels
+ *
+ * This function initializes the cx votes for all gmu frequencies
+ * for gmu dcvs
+ */
+static int setup_cx_arc_votes(struct adreno_device *adreno_dev,
+	struct rpmh_arc_vals *pri_rail, struct rpmh_arc_vals *sec_rail,
+	u32 *freqs, u32 *vlvls)
+{
+	/* Hardcoded values of GMU CX voltage levels */
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct a6xx_hfi *hfi = &gmu->hfi;
+	u16 gmu_cx_vlvl[MAX_CX_LEVELS];
+	u32 cx_votes[MAX_CX_LEVELS];
+	struct hfi_dcvstable_cmd *table = &hfi->dcvs_table;
+	int ret, i;
+
+	gmu_cx_vlvl[0] = 0;
+	gmu_cx_vlvl[1] = vlvls[0];
+	gmu_cx_vlvl[2] = vlvls[1];
+
+	table->gmu_level_num = 3;
+
+	table->cx_votes[0].freq = 0;
+	table->cx_votes[1].freq = freqs[0] / 1000;
+	table->cx_votes[2].freq = freqs[1] / 1000;
+
+	ret = setup_volt_dependency_tbl(cx_votes, pri_rail,
+			sec_rail, gmu_cx_vlvl, table->gmu_level_num);
+	if (!ret) {
+		for (i = 0; i < table->gmu_level_num; i++)
+			table->cx_votes[i].vote = cx_votes[i];
+	}
+
+	return ret;
+}
+
+static int to_cx_hlvl(struct rpmh_arc_vals *cx_rail, u32 vlvl, u32 *hlvl)
+{
+	u32 i;
+
+	/*
+	 * This means that the Gx level doesn't have a dependency on Cx level.
+	 * Return the same value to disable cx voting at GMU.
+	 */
+	if (vlvl == 0xffffffff) {
+		*hlvl = vlvl;
+		return 0;
+	}
+
+	for (i = 0; i < cx_rail->num; i++) {
+		if (cx_rail->val[i] >= vlvl) {
+			*hlvl = i;
+			return 0;
+		}
+	}
+
+	return -EINVAL;
+}
+
+/*
+ * setup_gx_arc_votes - Build the gpu dcvs voting table
+ * @hfi: Pointer to hfi device
+ * @pri_rail: Pointer to primary power rail vlvl table
+ * @sec_rail: Pointer to second/dependent power rail vlvl table
+ *
+ * This function initializes the gx votes for all gpu frequencies
+ * for gpu dcvs
+ */
+static int setup_gx_arc_votes(struct adreno_device *adreno_dev,
+	struct rpmh_arc_vals *pri_rail, struct rpmh_arc_vals *sec_rail,
+	struct rpmh_arc_vals *cx_rail)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct hfi_dcvstable_cmd *table = &gmu->hfi.dcvs_table;
+	u32 index;
+	u16 vlvl_tbl[MAX_GX_LEVELS];
+	u32 gx_votes[MAX_GX_LEVELS];
+	int ret, i;
+
+	/* Add the zero powerlevel for the perf table */
+	table->gpu_level_num = device->pwrctrl.num_pwrlevels + 1;
+
+	if (table->gpu_level_num > pri_rail->num ||
+		table->gpu_level_num > ARRAY_SIZE(vlvl_tbl)) {
+		dev_err(&gmu->pdev->dev,
+			"Defined more GPU DCVS levels than RPMh can support\n");
+		return -ERANGE;
+	}
+
+	memset(vlvl_tbl, 0, sizeof(vlvl_tbl));
+
+	table->gx_votes[0].freq = 0;
+	table->gx_votes[0].cx_vote = 0;
+	/* Disable cx vote in gmu dcvs table if it is not supported in DT */
+	if (pwr->pwrlevels[0].cx_level == 0xffffffff)
+		table->gx_votes[0].cx_vote = 0xffffffff;
+
+	/* GMU power levels are in ascending order */
+	for (index = 1, i = pwr->num_pwrlevels - 1; i >= 0; i--, index++) {
+		u32 cx_vlvl = pwr->pwrlevels[i].cx_level;
+
+		vlvl_tbl[index] = pwr->pwrlevels[i].voltage_level;
+		table->gx_votes[index].freq = pwr->pwrlevels[i].gpu_freq / 1000;
+
+		ret = to_cx_hlvl(cx_rail, cx_vlvl,
+				&table->gx_votes[index].cx_vote);
+		if (ret) {
+			dev_err(&gmu->pdev->dev, "Unsupported cx corner: %u\n",
+					cx_vlvl);
+			return ret;
+		}
+	}
+
+	ret = setup_volt_dependency_tbl(gx_votes, pri_rail,
+			sec_rail, vlvl_tbl, table->gpu_level_num);
+	if (!ret) {
+		for (i = 0; i < table->gpu_level_num; i++)
+			table->gx_votes[i].vote = gx_votes[i];
+	}
+
+	return ret;
+
+}
+
+static int build_dcvs_table(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct a6xx_hfi *hfi = &gmu->hfi;
+	struct rpmh_arc_vals gx_arc, cx_arc, mx_arc;
+	int ret;
+
+	ret = CMD_MSG_HDR(hfi->dcvs_table, H2F_MSG_PERF_TBL);
+	if (ret)
+		return ret;
+
+	ret = rpmh_arc_cmds(&gx_arc, "gfx.lvl");
+	if (ret)
+		return ret;
+
+	ret = rpmh_arc_cmds(&cx_arc, "cx.lvl");
+	if (ret)
+		return ret;
+
+	ret = rpmh_arc_cmds(&mx_arc, "mx.lvl");
+	if (ret)
+		return ret;
+
+	ret = setup_cx_arc_votes(adreno_dev, &cx_arc, &mx_arc,
+					gmu->freqs, gmu->vlvls);
+	if (ret)
+		return ret;
+
+	return setup_gx_arc_votes(adreno_dev, &gx_arc, &mx_arc, &cx_arc);
+}
+
+/*
+ * List of Bus Control Modules (BCMs) that need to be configured for the GPU
+ * to access DDR. For each bus level we will generate a vote each BC
+ */
+static struct bcm a660_ddr_bcms[] = {
+	{ .name = "SH0", .buswidth = 16 },
+	{ .name = "MC0", .buswidth = 4 },
+	{ .name = "ACV", .fixed = true },
+};
+
+/* Same as above, but for the CNOC BCMs */
+static struct bcm a660_cnoc_bcms[] = {
+	{ .name = "CN0", .buswidth = 4 },
+};
+
+static void build_bw_table_cmd(struct hfi_bwtable_cmd *cmd,
+		struct rpmh_bw_votes *ddr, struct rpmh_bw_votes *cnoc)
+{
+	u32 i, j;
+
+	cmd->bw_level_num = ddr->num_levels;
+	cmd->ddr_cmds_num = ddr->num_cmds;
+	cmd->ddr_wait_bitmask = ddr->wait_bitmask;
+
+	for (i = 0; i < ddr->num_cmds; i++)
+		cmd->ddr_cmd_addrs[i] = ddr->addrs[i];
+
+	for (i = 0; i < ddr->num_levels; i++)
+		for (j = 0; j < ddr->num_cmds; j++)
+			cmd->ddr_cmd_data[i][j] = (u32) ddr->cmds[i][j];
+
+	if (!cnoc)
+		return;
+
+	cmd->cnoc_cmds_num = cnoc->num_cmds;
+		cmd->cnoc_wait_bitmask = cnoc->wait_bitmask;
+
+	for (i = 0; i < cnoc->num_cmds; i++)
+		cmd->cnoc_cmd_addrs[i] = cnoc->addrs[i];
+
+	for (i = 0; i < cnoc->num_levels; i++)
+		for (j = 0; j < cnoc->num_cmds; j++)
+			cmd->cnoc_cmd_data[i][j] = (u32) cnoc->cmds[i][j];
+}
+
+static int build_bw_table(struct adreno_device *adreno_dev)
+{
+	struct a6xx_gmu_device *gmu = to_a6xx_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct rpmh_bw_votes *ddr, *cnoc = NULL;
+	u32 *cnoc_table;
+	u32 count;
+	int ret;
+
+	ddr = build_rpmh_bw_votes(a660_ddr_bcms, ARRAY_SIZE(a660_ddr_bcms),
+		pwr->ddr_table, pwr->ddr_table_count);
+	if (IS_ERR(ddr))
+		return PTR_ERR(ddr);
+
+	cnoc_table = kgsl_bus_get_table(device->pdev, "qcom,bus-table-cnoc",
+		&count);
+
+	if (count > 0)
+		cnoc = build_rpmh_bw_votes(a660_cnoc_bcms,
+			ARRAY_SIZE(a660_cnoc_bcms), cnoc_table, count);
+
+	kfree(cnoc_table);
+
+	if (IS_ERR(cnoc)) {
+		free_rpmh_bw_votes(ddr);
+		return PTR_ERR(cnoc);
+	}
+
+	ret = CMD_MSG_HDR(gmu->hfi.bw_table, H2F_MSG_BW_VOTE_TBL);
+	if (ret)
+		return ret;
+
+	build_bw_table_cmd(&gmu->hfi.bw_table, ddr, cnoc);
+
+	free_rpmh_bw_votes(ddr);
+	free_rpmh_bw_votes(cnoc);
+
+	return 0;
+}
+
+int a6xx_build_rpmh_tables(struct adreno_device *adreno_dev)
+{
+	int ret;
+
+	ret = build_dcvs_table(adreno_dev);
+	if (ret)
+		return ret;
+
+	return build_bw_table(adreno_dev);
+}
--- a/qcom/opensource/graphics-kernel/adreno_a6xx_snapshot.c
+++ b/qcom/opensource/graphics-kernel/adreno_a6xx_snapshot.c
--- a/qcom/opensource/graphics-kernel/adreno_compat.c
+++ b/qcom/opensource/graphics-kernel/adreno_compat.c
@ -0,0 +1,193 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2013-2020, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include "adreno.h"
+#include "adreno_compat.h"
+#include "kgsl_compat.h"
+
+int adreno_getproperty_compat(struct kgsl_device *device,
+		struct kgsl_device_getproperty *param)
+{
+	int status = -EINVAL;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	switch (param->type) {
+	case KGSL_PROP_DEVICE_INFO:
+		{
+			struct kgsl_devinfo_compat devinfo;
+
+			if (param->sizebytes != sizeof(devinfo)) {
+				status = -EINVAL;
+				break;
+			}
+
+			memset(&devinfo, 0, sizeof(devinfo));
+			devinfo.device_id = device->id + 1;
+			devinfo.chip_id = adreno_dev->chipid;
+			devinfo.mmu_enabled =
+				kgsl_mmu_has_feature(device, KGSL_MMU_PAGED);
+			devinfo.gmem_gpubaseaddr = 0;
+			devinfo.gmem_sizebytes =
+					adreno_dev->gpucore->gmem_size;
+
+			if (copy_to_user(param->value, &devinfo,
+				sizeof(devinfo))) {
+				status = -EFAULT;
+				break;
+			}
+			status = 0;
+		}
+		break;
+	case KGSL_PROP_DEVICE_SHADOW:
+		{
+			struct kgsl_shadowprop_compat shadowprop;
+
+			if (param->sizebytes != sizeof(shadowprop)) {
+				status = -EINVAL;
+				break;
+			}
+			memset(&shadowprop, 0, sizeof(shadowprop));
+			if (device->memstore->hostptr) {
+				/* Give a token address to identify memstore */
+				shadowprop.gpuaddr = (unsigned int)
+					KGSL_MEMSTORE_TOKEN_ADDRESS;
+				shadowprop.size =
+					(unsigned int) device->memstore->size;
+				/*
+				 * GSL needs this to be set, even if it
+				 * appears to be meaningless
+				 */
+				shadowprop.flags = KGSL_FLAGS_INITIALIZED |
+					KGSL_FLAGS_PER_CONTEXT_TIMESTAMPS;
+			}
+			if (copy_to_user(param->value, &shadowprop,
+				sizeof(shadowprop))) {
+				status = -EFAULT;
+				break;
+			}
+			status = 0;
+		}
+		break;
+	default:
+		status = device->ftbl->getproperty(device, param);
+	}
+
+	return status;
+}
+
+int adreno_setproperty_compat(struct kgsl_device_private *dev_priv,
+				unsigned int type,
+				void __user *value,
+				unsigned int sizebytes)
+{
+	int status = -EINVAL;
+	struct kgsl_device *device = dev_priv->device;
+
+	switch (type) {
+	case KGSL_PROP_PWR_CONSTRAINT:
+	case KGSL_PROP_L3_PWR_CONSTRAINT: {
+			struct kgsl_device_constraint_compat constraint32;
+			struct kgsl_device_constraint constraint;
+			struct kgsl_context *context;
+
+			if (sizebytes != sizeof(constraint32))
+				break;
+
+			if (copy_from_user(&constraint32, value,
+				sizeof(constraint32))) {
+				status = -EFAULT;
+				break;
+			}
+
+			/* Populate the real constraint type from the compat */
+			constraint.type = constraint32.type;
+			constraint.context_id = constraint32.context_id;
+			constraint.data = compat_ptr(constraint32.data);
+			constraint.size = (size_t)constraint32.size;
+
+			context = kgsl_context_get_owner(dev_priv,
+							constraint.context_id);
+			if (context == NULL)
+				break;
+			status = adreno_set_constraint(device, context,
+								&constraint);
+			kgsl_context_put(context);
+		}
+		break;
+	default:
+		/*
+		 * Call adreno_setproperty in case the property type was
+		 * KGSL_PROP_PWRCTRL
+		 */
+		status = device->ftbl->setproperty(dev_priv, type, value,
+						sizebytes);
+	}
+
+	return status;
+}
+
+static long adreno_ioctl_perfcounter_query_compat(
+		struct kgsl_device_private *dev_priv, unsigned int cmd,
+		void *data)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(dev_priv->device);
+	struct kgsl_perfcounter_query_compat *query32 = data;
+	struct kgsl_perfcounter_query query;
+	long result;
+
+	query.groupid = query32->groupid;
+	query.countables = compat_ptr(query32->countables);
+	query.count = query32->count;
+	query.max_counters = query32->max_counters;
+
+	result = adreno_perfcounter_query_group(adreno_dev,
+		query.groupid, query.countables,
+		query.count, &query.max_counters);
+	query32->max_counters = query.max_counters;
+
+	return result;
+}
+
+static long adreno_ioctl_perfcounter_read_compat(
+		struct kgsl_device_private *dev_priv, unsigned int cmd,
+		void *data)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(dev_priv->device);
+	struct kgsl_perfcounter_read_compat *read32 = data;
+	struct kgsl_perfcounter_read read;
+
+	/*
+	 * When performance counter zapping is enabled, the counters are cleared
+	 * across context switches. Reading the counters when they are zapped is
+	 * not permitted.
+	 */
+	if (!adreno_dev->perfcounter)
+		return -EPERM;
+
+	read.reads = (struct kgsl_perfcounter_read_group __user *)
+		(uintptr_t)read32->reads;
+	read.count = read32->count;
+
+	return adreno_perfcounter_read_group(adreno_dev, read.reads,
+		read.count);
+}
+
+static struct kgsl_ioctl adreno_compat_ioctl_funcs[] = {
+	{ IOCTL_KGSL_PERFCOUNTER_GET, adreno_ioctl_perfcounter_get },
+	{ IOCTL_KGSL_PERFCOUNTER_PUT, adreno_ioctl_perfcounter_put },
+	{ IOCTL_KGSL_PERFCOUNTER_QUERY_COMPAT,
+		adreno_ioctl_perfcounter_query_compat },
+	{ IOCTL_KGSL_PERFCOUNTER_READ_COMPAT,
+		adreno_ioctl_perfcounter_read_compat },
+};
+
+long adreno_compat_ioctl(struct kgsl_device_private *dev_priv,
+			      unsigned int cmd, unsigned long arg)
+{
+	return adreno_ioctl_helper(dev_priv, cmd, arg,
+		adreno_compat_ioctl_funcs,
+		ARRAY_SIZE(adreno_compat_ioctl_funcs));
+}
--- a/qcom/opensource/graphics-kernel/adreno_compat.h
+++ b/qcom/opensource/graphics-kernel/adreno_compat.h
@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2013-2015, 2017, 2019 The Linux Foundation. All rights reserved.
+ */
+#ifndef __ADRENO_COMPAT_H
+#define __ADRENO_COMPAT_H
+
+#ifdef CONFIG_COMPAT
+
+struct kgsl_device;
+struct kgsl_device_private;
+
+int adreno_getproperty_compat(struct kgsl_device *device,
+		struct kgsl_device_getproperty *param);
+
+int adreno_setproperty_compat(struct kgsl_device_private *dev_priv,
+				unsigned int type,
+				void __user *value,
+				unsigned int sizebytes);
+
+long adreno_compat_ioctl(struct kgsl_device_private *dev_priv,
+			unsigned int cmd, unsigned long arg);
+
+#else
+
+static inline int adreno_getproperty_compat(struct kgsl_device *device,
+		struct kgsl_device_getproperty *param)
+{
+	return -EINVAL;
+}
+
+static inline int adreno_setproperty_compat(struct kgsl_device_private
+				*dev_priv, unsigned int type,
+				void __user *value, unsigned int sizebytes)
+{
+	return -EINVAL;
+}
+
+static inline long adreno_compat_ioctl(struct kgsl_device_private *dev_priv,
+				unsigned int cmd, unsigned long arg)
+{
+	return -EINVAL;
+}
+
+#endif /* CONFIG_COMPAT */
+#endif /* __ADRENO_COMPAT_H */
--- a/qcom/opensource/graphics-kernel/adreno_coresight.c
+++ b/qcom/opensource/graphics-kernel/adreno_coresight.c
@ -0,0 +1,333 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2013-2020, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022, 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/of.h>
+#include <linux/of_platform.h>
+
+#include "adreno.h"
+
+#define TO_ADRENO_CORESIGHT_ATTR(_attr) \
+	container_of(_attr, struct adreno_coresight_attr, attr)
+
+ssize_t adreno_coresight_show_register(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct adreno_coresight_device *adreno_csdev = dev_get_drvdata(dev);
+	struct adreno_coresight_attr *cattr = TO_ADRENO_CORESIGHT_ATTR(attr);
+	struct kgsl_device *device = adreno_csdev->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	unsigned int val = 0;
+
+	mutex_lock(&device->mutex);
+	/*
+	 * Return the current value of the register if coresight is enabled,
+	 * otherwise report 0
+	 */
+
+	if (!adreno_csdev->enabled)
+		goto out;
+
+	if (!adreno_active_count_get(adreno_dev)) {
+		kgsl_regread(device, cattr->reg->offset, &cattr->reg->value);
+		adreno_active_count_put(adreno_dev);
+	}
+	val = cattr->reg->value;
+
+out:
+	mutex_unlock(&device->mutex);
+	return scnprintf(buf, PAGE_SIZE, "0x%X\n", val);
+}
+
+ssize_t adreno_coresight_store_register(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t size)
+{
+
+	struct adreno_coresight_attr *cattr = TO_ADRENO_CORESIGHT_ATTR(attr);
+	struct adreno_coresight_device *adreno_csdev = dev_get_drvdata(dev);
+	struct kgsl_device *device = adreno_csdev->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	unsigned long val;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &val);
+	if (ret)
+		return ret;
+
+	mutex_lock(&device->mutex);
+
+	/* Ignore writes while coresight is off */
+	if (!adreno_csdev->enabled)
+		goto out;
+
+	cattr->reg->value = val;
+	if (!adreno_active_count_get(adreno_dev)) {
+		kgsl_regwrite(device, cattr->reg->offset, cattr->reg->value);
+		adreno_active_count_put(adreno_dev);
+	}
+
+out:
+	mutex_unlock(&device->mutex);
+	return size;
+}
+
+/*
+ * This is a generic function to disable coresight debug bus on Adreno
+ * devices. This function in turn calls the device specific function
+ * through the gpudev hook.
+ */
+static void adreno_coresight_disable(struct coresight_device *csdev,
+					struct perf_event *event)
+{
+	struct adreno_coresight_device *adreno_csdev = dev_get_drvdata(&csdev->dev);
+	struct kgsl_device *device = adreno_csdev->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	const struct adreno_coresight *coresight = adreno_csdev->coresight;
+	int i;
+
+	mutex_lock(&device->mutex);
+
+	if (!adreno_csdev->enabled) {
+		mutex_unlock(&device->mutex);
+		return;
+	}
+
+	if (!adreno_active_count_get(adreno_dev)) {
+		for (i = 0; i < coresight->count; i++)
+			kgsl_regwrite(device,
+				coresight->registers[i].offset, 0);
+		adreno_active_count_put(adreno_dev);
+	}
+
+	adreno_csdev->enabled = false;
+
+	mutex_unlock(&device->mutex);
+}
+
+static void _adreno_coresight_get_and_clear(struct adreno_device *adreno_dev,
+		struct adreno_coresight_device *adreno_csdev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_coresight *coresight = adreno_csdev->coresight;
+	int i;
+
+	if (IS_ERR_OR_NULL(adreno_csdev->dev) || !adreno_csdev->enabled)
+		return;
+
+	kgsl_pre_hwaccess(device);
+	/*
+	 * Save the current value of each coresight register
+	 * and then clear each register
+	 */
+	for (i = 0; i < coresight->count; i++) {
+		kgsl_regread(device, coresight->registers[i].offset,
+			&coresight->registers[i].value);
+		kgsl_regwrite(device, coresight->registers[i].offset, 0);
+	}
+}
+
+static void _adreno_coresight_set(struct adreno_device *adreno_dev,
+		struct adreno_coresight_device *adreno_csdev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_coresight *coresight = adreno_csdev->coresight;
+	int i;
+
+	if (IS_ERR_OR_NULL(adreno_csdev->dev) || !adreno_csdev->enabled)
+		return;
+
+	for (i = 0; i < coresight->count; i++)
+		kgsl_regwrite(device, coresight->registers[i].offset,
+			coresight->registers[i].value);
+}
+
+/* Generic function to enable coresight debug bus on adreno devices */
+static int adreno_coresight_enable(struct coresight_device *csdev,
+				struct perf_event *event, u32 mode)
+{
+	struct adreno_coresight_device *adreno_csdev = dev_get_drvdata(&csdev->dev);
+	const struct adreno_coresight *coresight = adreno_csdev->coresight;
+	struct kgsl_device *device = adreno_csdev->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	int ret = 0;
+
+	mutex_lock(&device->mutex);
+	if (!adreno_csdev->enabled) {
+		int i;
+
+		adreno_csdev->enabled = true;
+
+		/* Reset all the debug registers to their default values */
+		for (i = 0; i < coresight->count; i++)
+			coresight->registers[i].value =
+				coresight->registers[i].initial;
+
+		ret = adreno_active_count_get(adreno_dev);
+		if (!ret) {
+			_adreno_coresight_set(adreno_dev, adreno_csdev);
+			adreno_active_count_put(adreno_dev);
+		}
+
+	}
+	mutex_unlock(&device->mutex);
+	return ret;
+}
+
+void adreno_coresight_stop(struct adreno_device *adreno_dev)
+{
+	_adreno_coresight_get_and_clear(adreno_dev, &adreno_dev->gx_coresight);
+	_adreno_coresight_get_and_clear(adreno_dev, &adreno_dev->cx_coresight);
+}
+
+void adreno_coresight_start(struct adreno_device *adreno_dev)
+{
+	_adreno_coresight_set(adreno_dev, &adreno_dev->gx_coresight);
+	_adreno_coresight_set(adreno_dev, &adreno_dev->cx_coresight);
+}
+
+static int adreno_coresight_trace_id(struct coresight_device *csdev)
+{
+	struct adreno_coresight_device *adreno_csdev = dev_get_drvdata(&csdev->dev);
+
+	return adreno_csdev->atid;
+}
+
+static const struct coresight_ops_source adreno_coresight_source_ops = {
+	.trace_id = adreno_coresight_trace_id,
+	.enable = adreno_coresight_enable,
+	.disable = adreno_coresight_disable,
+};
+
+static const struct coresight_ops adreno_coresight_ops = {
+	.source_ops = &adreno_coresight_source_ops,
+};
+
+void adreno_coresight_remove(struct adreno_device *adreno_dev)
+{
+	if (!IS_ERR_OR_NULL(adreno_dev->gx_coresight.dev))
+		coresight_unregister(adreno_dev->gx_coresight.dev);
+
+	if (!IS_ERR_OR_NULL(adreno_dev->cx_coresight.dev))
+		coresight_unregister(adreno_dev->cx_coresight.dev);
+}
+
+static int funnel_gfx_enable(struct coresight_device *csdev, int inport,
+			 int outport)
+{
+	struct kgsl_device *device = kgsl_get_device(0);
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	int ret;
+
+	if (!device)
+		return -ENODEV;
+
+	mutex_lock(&device->mutex);
+
+	ret = adreno_active_count_get(adreno_dev);
+	if (ret)
+		goto err;
+
+	/* Now that GPU is up, Call into coresight driver to enable funnel */
+	ret = adreno_dev->funnel_gfx.funnel_ops->link_ops->enable(csdev, inport, outport);
+
+	adreno_active_count_put(adreno_dev);
+err:
+	mutex_unlock(&device->mutex);
+	return ret;
+}
+
+static void funnel_gfx_disable(struct coresight_device *csdev, int inport,
+			   int outport)
+{
+	struct kgsl_device *device = kgsl_get_device(0);
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	int ret;
+
+	if (!device)
+		return;
+
+	mutex_lock(&device->mutex);
+
+	ret = adreno_active_count_get(adreno_dev);
+	if (ret)
+		goto err;
+
+	/* Now that GPU is up, Call into coresight driver to disable funnel */
+	adreno_dev->funnel_gfx.funnel_ops->link_ops->disable(csdev, inport, outport);
+
+	adreno_active_count_put(adreno_dev);
+err:
+	mutex_unlock(&device->mutex);
+}
+
+struct coresight_ops_link funnel_link_gfx_ops = {
+	.enable = funnel_gfx_enable,
+	.disable = funnel_gfx_disable,
+};
+
+struct coresight_ops funnel_gfx_ops = {
+	.link_ops = &funnel_link_gfx_ops,
+};
+
+static void adreno_coresight_dev_probe(struct kgsl_device *device,
+		const struct adreno_coresight *coresight,
+		struct adreno_coresight_device *adreno_csdev,
+		struct device_node *node)
+{
+	struct platform_device *pdev = of_find_device_by_node(node);
+	struct coresight_desc desc;
+	u32 atid;
+
+	if (!pdev)
+		return;
+
+	if (of_property_read_u32(node, "coresight-atid", &atid))
+		return;
+
+	if (of_property_read_string(node, "coresight-name", &desc.name))
+		return;
+
+	desc.pdata = coresight_get_platform_data(&pdev->dev);
+	platform_device_put(pdev);
+
+	if (IS_ERR(desc.pdata))
+		return;
+
+	desc.type = CORESIGHT_DEV_TYPE_SOURCE;
+	desc.subtype.source_subtype = CORESIGHT_DEV_SUBTYPE_SOURCE_SOFTWARE;
+	desc.ops = &adreno_coresight_ops;
+	desc.dev = &device->pdev->dev;
+	desc.groups = coresight->groups;
+
+	adreno_csdev->atid = atid;
+	adreno_csdev->device = device;
+	adreno_csdev->dev = coresight_register(&desc);
+
+	adreno_csdev->coresight = coresight;
+
+	if (!IS_ERR_OR_NULL(adreno_csdev->dev))
+		dev_set_drvdata(&adreno_csdev->dev->dev, adreno_csdev);
+}
+
+void adreno_coresight_add_device(struct adreno_device *adreno_dev, const char *name,
+		const struct adreno_coresight *coresight,
+		struct adreno_coresight_device *adreno_csdev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct device_node *node = of_find_compatible_node(device->pdev->dev.of_node, NULL, name);
+	struct adreno_funnel_device *funnel_gfx = &adreno_dev->funnel_gfx;
+
+	if (!node)
+		return;
+
+	/* Set the funnel ops as graphics ops to bring GPU up before enabling funnel */
+	if ((funnel_gfx != NULL) && (funnel_gfx->funnel_csdev != NULL)
+						&& (funnel_gfx->funnel_csdev->ops == NULL))
+		funnel_gfx->funnel_csdev->ops = &funnel_gfx_ops;
+
+	adreno_coresight_dev_probe(device, coresight, adreno_csdev, node);
+
+	of_node_put(node);
+}
--- a/qcom/opensource/graphics-kernel/adreno_coresight.h
+++ b/qcom/opensource/graphics-kernel/adreno_coresight.h
@ -0,0 +1,164 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2019, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _ADRENO_CORESIGHT_H_
+#define _ADRENO_CORESIGHT_H_
+
+#include <linux/device.h>
+#include <linux/coresight.h>
+
+struct adreno_device;
+
+/**
+ * struct adreno_coresight_register - Definition for a coresight (tracebus)
+ * debug register
+ */
+struct adreno_coresight_register {
+	/** @offset: Offset of the debug register in the KGSL register space */
+	unsigned int offset;
+	/** @initial: Default value to write when coresight is enabled */
+	unsigned int initial;
+	/**
+	 * @value: Current shadow value of the register (to be reprogrammed
+	 * after power collapse)
+	 */
+	unsigned int value;
+};
+
+/**
+ * struct adreno_coresight_attr - Local attribute struct for coresight sysfs
+ *
+ * files
+ */
+struct adreno_coresight_attr {
+	/** @attr: Base device attribute */
+	struct device_attribute attr;
+	/**
+	 * @reg: Pointer to the &struct adreno_coresight_register definition
+	 * for this register
+	 */
+	struct adreno_coresight_register *reg;
+};
+
+/**
+ * adreno_coresight_show_register - Callback function for sysfs show
+ * @device: Pointer to a device handle
+ * @attr: Pointer to the device attribute
+ * @buf: Contains the output buffer for sysfs
+ *
+ * Callback function to write the value of the register into the sysfs node.
+ * Return: The size of the data written to the buffer or negative on error.
+ */
+ssize_t adreno_coresight_show_register(struct device *device,
+		struct device_attribute *attr, char *buf);
+
+/**
+ * adreno_coresight_show_register - Callback function for sysfs store
+ * @device: Pointer to a device handle
+ * @attr: Pointer to the device attribute
+ * @buf: Contains the input buffer for sysfs
+ * @size: Size of the data stored in buf
+ *
+ * Callback function to read the value of a register from a sysfs node.
+ * Return: The size of the data consumed or negative on error.
+ */
+ssize_t adreno_coresight_store_register(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t size);
+
+#define ADRENO_CORESIGHT_ATTR(_attrname, _reg) \
+	struct adreno_coresight_attr coresight_attr_##_attrname  = { \
+		__ATTR(_attrname, 0644, \
+		adreno_coresight_show_register, \
+		adreno_coresight_store_register), \
+		(_reg), }
+
+/**
+ * struct adreno_coresight - GPU specific coresight definition
+ */
+struct adreno_coresight {
+	/**
+	 * @registers: Array of GPU specific registers to configure trace
+	 * bus output
+	 */
+	struct adreno_coresight_register *registers;
+	/** @count: Number of registers in the array */
+	unsigned int count;
+	/** @groups: Pointer to an attribute list of control files */
+	const struct attribute_group **groups;
+};
+
+/**
+ * struct adreno_coresight_device - Container for a coresight instance
+ */
+struct adreno_coresight_device {
+	/** @dev: Pointer to the corsight device */
+	struct coresight_device *dev;
+	/** @coresight: Point to the GPU specific coresight definition */
+	const struct adreno_coresight *coresight;
+	/** @device: Pointer to a GPU device handle */
+	struct kgsl_device *device;
+	/** @enabled: True if the coresight instance is enabled */
+	bool enabled;
+	/** @atid: The unique ATID value of the coresight device */
+	unsigned int atid;
+};
+
+/**
+ * struct adreno_funnel_device - Container for a coresight gfx funnel
+ */
+struct adreno_funnel_device {
+	/** @funnel_dev: Pointer to the gfx funnel device */
+	struct device *funnel_dev;
+	/** @funnel_csdev: Point to the gfx funnel coresight definition */
+	struct coresight_device *funnel_csdev;
+	/** @funnel_ops: Function pointers to enable/disable the coresight funnel */
+	const struct coresight_ops *funnel_ops;
+};
+
+#ifdef CONFIG_QCOM_KGSL_CORESIGHT
+
+void adreno_coresight_add_device(struct adreno_device *adreno_dev,
+		const char *name,
+		const struct adreno_coresight *coresight,
+		struct adreno_coresight_device *adreno_csdev);
+
+/**
+ * adreno_coresight_start - Reprogram coresight registers after power collapse
+ * @adreno_dev: An Adreno GPU device handle
+ *
+ * Reprogram the cached values to the coresight registers on power up.
+ */
+void adreno_coresight_start(struct adreno_device *adreno_dev);
+
+/**
+ * adreno_coresight_stop - Reprogram coresight registers after power collapse
+ * @adreno_dev: An Adreno GPU device handle
+ *
+ * Cache the current coresight register values so they can be restored after
+ * power collapse.
+ */
+void adreno_coresight_stop(struct adreno_device *adreno_dev);
+
+/**
+ * adreno_coresight_remove - Destroy active coresight devices
+ * @adreno_dev: An Adreno GPU device handle
+ *
+ * Destroy any active coresight devices.
+ */
+void adreno_coresight_remove(struct adreno_device *adreno_dev);
+#else
+static inline void adreno_coresight_add_device(struct kgsl_device *device,
+		const char *name,
+		const struct adreno_coresight *coresight,
+		struct adreno_coresight_device *adreno_csdev)
+{
+}
+
+static inline void adreno_coresight_start(struct adreno_device *adreno_dev) { }
+static inline void adreno_coresight_stop(struct adreno_device *adreno_dev) { }
+static inline void adreno_coresight_remove(struct adreno_device *adreno_dev) { }
+#endif
+#endif
--- a/qcom/opensource/graphics-kernel/adreno_cp_parser.c
+++ b/qcom/opensource/graphics-kernel/adreno_cp_parser.c
--- a/qcom/opensource/graphics-kernel/adreno_cp_parser.h
+++ b/qcom/opensource/graphics-kernel/adreno_cp_parser.h
@ -0,0 +1,175 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2013-2014, 2017, 2019, The Linux Foundation. All rights reserved.
+ */
+
+#ifndef __ADRENO_IB_PARSER__
+#define __ADRENO_IB_PARSER__
+
+#include "adreno.h"
+
+extern const unsigned int a3xx_cp_addr_regs[];
+extern const unsigned int a4xx_cp_addr_regs[];
+
+/*
+ * struct adreno_ib_object - Structure containing information about an
+ * address range found in an IB
+ * @gpuaddr: The starting gpuaddress of the range
+ * @size: Size of the range
+ * @snapshot_obj_type - Type of range used in snapshot
+ * @entry: The memory entry in which this range is found
+ */
+struct adreno_ib_object {
+	uint64_t gpuaddr;
+	uint64_t size;
+	int snapshot_obj_type;
+	struct kgsl_mem_entry *entry;
+};
+
+/*
+ * struct adreno_ib_object_list - List of address ranges found in IB
+ * @obj_list: The address range list
+ * @num_objs: Number of objects in list
+ */
+struct adreno_ib_object_list {
+	struct adreno_ib_object *obj_list;
+	int num_objs;
+};
+
+/*
+ * adreno registers used during IB parsing, there contain addresses
+ * and sizes of the addresses that present in an IB
+ */
+enum adreno_cp_addr_regs {
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_0 = 0,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_0,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_1,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_1,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_2,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_2,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_3,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_3,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_4,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_4,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_5,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_5,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_6,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_6,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_7,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_7,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_0,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_1,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_2,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_3,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_4,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_5,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_6,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_7,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_8,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_9,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_10,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_11,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_12,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_13,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_14,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_15,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_16,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_17,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_18,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_19,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_20,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_21,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_22,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_23,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_24,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_25,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_26,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_27,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_28,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_29,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_30,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_31,
+	ADRENO_CP_ADDR_VSC_SIZE_ADDRESS,
+	ADRENO_CP_ADDR_SP_VS_PVT_MEM_ADDR,
+	ADRENO_CP_ADDR_SP_FS_PVT_MEM_ADDR,
+	ADRENO_CP_ADDR_SP_VS_OBJ_START_REG,
+	ADRENO_CP_ADDR_SP_FS_OBJ_START_REG,
+	ADRENO_CP_UCHE_INVALIDATE0,
+	ADRENO_CP_UCHE_INVALIDATE1,
+	ADRENO_CP_ADDR_MAX,
+};
+
+/*
+ * adreno_ib_init_ib_obj() - Create an ib object structure and initialize it
+ * with gpuaddress and size
+ * @gpuaddr: gpuaddr with which to initialize the object with
+ * @size: Size in bytes with which the object is initialized
+ * @ib_type: The IB type used by snapshot
+ *
+ * Returns the object pointer on success else error code in the pointer
+ */
+static inline void adreno_ib_init_ib_obj(uint64_t gpuaddr,
+			uint64_t size, int obj_type,
+			struct kgsl_mem_entry *entry,
+			struct adreno_ib_object *ib_obj)
+{
+	ib_obj->gpuaddr = gpuaddr;
+	ib_obj->size = size;
+	ib_obj->snapshot_obj_type = obj_type;
+	ib_obj->entry = entry;
+}
+
+/*
+ * adreno_cp_parser_getreg() - Returns the value of register offset
+ * @adreno_dev: The adreno device being operated upon
+ * @reg_enum: Enum index of the register whose offset is returned
+ */
+static inline int adreno_cp_parser_getreg(struct adreno_device *adreno_dev,
+					enum adreno_cp_addr_regs reg_enum)
+{
+	if (reg_enum == ADRENO_CP_ADDR_MAX)
+		return -EEXIST;
+
+	if (!adreno_is_a3xx(adreno_dev))
+		return -EEXIST;
+	return a3xx_cp_addr_regs[reg_enum];
+}
+
+/*
+ * adreno_cp_parser_regindex() - Returns enum index for a given register offset
+ * @adreno_dev: The adreno device being operated upon
+ * @offset: Register offset
+ * @start: The start index to search from
+ * @end: The last index to search
+ *
+ * Checks the list of registers defined for the device and returns the index
+ * whose offset value matches offset parameter.
+ */
+static inline int adreno_cp_parser_regindex(struct adreno_device *adreno_dev,
+				unsigned int offset,
+				enum adreno_cp_addr_regs start,
+				enum adreno_cp_addr_regs end)
+{
+	int i;
+	const unsigned int *regs;
+
+	if (!adreno_is_a3xx(adreno_dev))
+		return -EEXIST;
+
+	regs = a3xx_cp_addr_regs;
+
+	for (i = start; i <= end && i < ADRENO_CP_ADDR_MAX; i++)
+		if (regs[i] == offset)
+			return i;
+	return -EEXIST;
+}
+
+int adreno_ib_create_object_list(
+		struct kgsl_device *device,
+		struct kgsl_process_private *process,
+		uint64_t gpuaddr, uint64_t dwords, uint64_t ib2base,
+		struct adreno_ib_object_list **out_ib_obj_list);
+
+void adreno_ib_destroy_obj_list(struct adreno_ib_object_list *ib_obj_list);
+
+#endif
--- a/qcom/opensource/graphics-kernel/adreno_debugfs.c
+++ b/qcom/opensource/graphics-kernel/adreno_debugfs.c
@ -0,0 +1,680 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2002,2008-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/debugfs.h>
+
+#include "adreno.h"
+extern struct dentry *kgsl_debugfs_dir;
+
+static void set_isdb(struct adreno_device *adreno_dev, void *priv)
+{
+	set_bit(ADRENO_DEVICE_ISDB_ENABLED, &adreno_dev->priv);
+}
+
+static int _isdb_set(void *data, u64 val)
+{
+	struct kgsl_device *device = data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	/* Once ISDB goes enabled it stays enabled */
+	if (test_bit(ADRENO_DEVICE_ISDB_ENABLED, &adreno_dev->priv))
+		return 0;
+
+	/*
+	 * Bring down the GPU so we can bring it back up with the correct power
+	 * and clock settings
+	 */
+	return  adreno_power_cycle(adreno_dev, set_isdb, NULL);
+}
+
+static int _isdb_get(void *data, u64 *val)
+{
+	struct kgsl_device *device = data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	*val = (u64) test_bit(ADRENO_DEVICE_ISDB_ENABLED, &adreno_dev->priv);
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(_isdb_fops, _isdb_get, _isdb_set, "%llu\n");
+
+static int _ctxt_record_size_set(void *data, u64 val)
+{
+	struct kgsl_device *device = data;
+
+	device->snapshot_ctxt_record_size = val;
+
+	return 0;
+}
+
+static int _ctxt_record_size_get(void *data, u64 *val)
+{
+	struct kgsl_device *device = data;
+
+	*val = device->snapshot_ctxt_record_size;
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(_ctxt_record_size_fops, _ctxt_record_size_get,
+		_ctxt_record_size_set, "%llu\n");
+
+static int _lm_limit_set(void *data, u64 val)
+{
+	struct kgsl_device *device = data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_LM))
+		return 0;
+
+	/* assure value is between 3A and 10A */
+	if (val > 10000)
+		val = 10000;
+	else if (val < 3000)
+		val = 3000;
+
+	if (adreno_dev->lm_enabled)
+		return adreno_power_cycle_u32(adreno_dev,
+			&adreno_dev->lm_limit, val);
+
+	return 0;
+}
+
+static int _lm_limit_get(void *data, u64 *val)
+{
+	struct kgsl_device *device = data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_LM))
+		*val = 0;
+
+	*val = (u64) adreno_dev->lm_limit;
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(_lm_limit_fops, _lm_limit_get,
+		_lm_limit_set, "%llu\n");
+
+static int _lm_threshold_count_get(void *data, u64 *val)
+{
+	struct kgsl_device *device = data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_LM))
+		*val = 0;
+	else
+		*val = (u64) adreno_dev->lm_threshold_cross;
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(_lm_threshold_fops, _lm_threshold_count_get,
+	NULL, "%llu\n");
+
+static int _active_count_get(void *data, u64 *val)
+{
+	struct kgsl_device *device = data;
+	unsigned int i = atomic_read(&device->active_cnt);
+
+	*val = (u64) i;
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(_active_count_fops, _active_count_get, NULL, "%llu\n");
+
+static int _coop_reset_set(void *data, u64 val)
+{
+	struct kgsl_device *device = data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_COOP_RESET))
+		adreno_dev->cooperative_reset = val ? true : false;
+	return 0;
+}
+
+static int _coop_reset_get(void *data, u64 *val)
+{
+	struct kgsl_device *device = data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	*val = (u64) adreno_dev->cooperative_reset;
+	return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(_coop_reset_fops, _coop_reset_get,
+				_coop_reset_set, "%llu\n");
+
+static void set_gpu_client_pf(struct adreno_device *adreno_dev, void *priv)
+{
+	adreno_dev->uche_client_pf = *((u32 *)priv);
+	adreno_dev->patch_reglist = false;
+}
+
+static int _gpu_client_pf_set(void *data, u64 val)
+{
+	struct kgsl_device *device = data;
+
+	return adreno_power_cycle(ADRENO_DEVICE(device), set_gpu_client_pf, &val);
+}
+
+static int _gpu_client_pf_get(void *data, u64 *val)
+{
+	struct kgsl_device *device = data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	*val = (u64) adreno_dev->uche_client_pf;
+	return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(_gpu_client_pf_fops, _gpu_client_pf_get,
+				_gpu_client_pf_set, "%llu\n");
+
+typedef void (*reg_read_init_t)(struct kgsl_device *device);
+typedef void (*reg_read_fill_t)(struct kgsl_device *device, int i,
+	unsigned int *vals, int linec);
+
+
+static void sync_event_print(struct seq_file *s,
+		struct kgsl_drawobj_sync_event *sync_event)
+{
+	switch (sync_event->type) {
+	case KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP: {
+		seq_printf(s, "sync: ctx: %u ts: %u",
+				sync_event->context->id, sync_event->timestamp);
+		break;
+	}
+	case KGSL_CMD_SYNCPOINT_TYPE_FENCE: {
+		int i;
+		struct event_fence_info *info = sync_event->priv;
+
+		for (i = 0; info && i < info->num_fences; i++)
+			seq_printf(s, "sync: %s",
+				info->fences[i].name);
+		break;
+	}
+	case KGSL_CMD_SYNCPOINT_TYPE_TIMELINE: {
+		int j;
+		struct event_timeline_info *info = sync_event->priv;
+
+		for (j = 0; info && info[j].timeline; j++)
+			seq_printf(s, "timeline: %d seqno: %lld",
+				info[j].timeline, info[j].seqno);
+		break;
+	}
+	default:
+		seq_printf(s, "sync: type: %d", sync_event->type);
+		break;
+	}
+}
+
+struct flag_entry {
+	unsigned long mask;
+	const char *str;
+};
+
+static void _print_flags(struct seq_file *s, const struct flag_entry *table,
+			unsigned long flags)
+{
+	int i;
+	int first = 1;
+
+	for (i = 0; table[i].str; i++) {
+		if (flags & table[i].mask) {
+			seq_printf(s, "%c%s", first ? '\0' : '|', table[i].str);
+			flags &= ~(table[i].mask);
+			first = 0;
+		}
+	}
+	if (flags) {
+		seq_printf(s, "%c0x%lx", first ? '\0' : '|', flags);
+		first = 0;
+	}
+	if (first)
+		seq_puts(s, "None");
+}
+
+#define print_flags(_s, _flag, _array...)		\
+	({						\
+		const struct flag_entry symbols[] =   \
+			{ _array, { -1, NULL } };	\
+		_print_flags(_s, symbols, _flag);	\
+	 })
+
+static void syncobj_print(struct seq_file *s,
+			struct kgsl_drawobj_sync *syncobj)
+{
+	struct kgsl_drawobj_sync_event *event;
+	unsigned int i;
+
+	seq_puts(s, " syncobj ");
+
+	for (i = 0; i < syncobj->numsyncs; i++) {
+		event = &syncobj->synclist[i];
+
+		if (!kgsl_drawobj_event_pending(syncobj, i))
+			continue;
+
+		sync_event_print(s, event);
+		seq_puts(s, "\n");
+	}
+}
+
+static void cmdobj_print(struct seq_file *s,
+			struct kgsl_drawobj_cmd *cmdobj)
+{
+	struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+
+	if (drawobj->type == CMDOBJ_TYPE)
+		seq_puts(s, " cmdobj ");
+	else
+		seq_puts(s, " markerobj ");
+
+	seq_printf(s, "\t %u ", drawobj->timestamp);
+
+	seq_puts(s, " priv: ");
+	print_flags(s, cmdobj->priv,
+		{ BIT(CMDOBJ_SKIP), "skip"},
+		{ BIT(CMDOBJ_FORCE_PREAMBLE), "force_preamble"},
+		{ BIT(CMDOBJ_WFI), "wait_for_idle" });
+}
+
+static void drawobj_print(struct seq_file *s,
+			struct kgsl_drawobj *drawobj)
+{
+	if (!kref_get_unless_zero(&drawobj->refcount))
+		return;
+
+	if (drawobj->type == SYNCOBJ_TYPE)
+		syncobj_print(s, SYNCOBJ(drawobj));
+	else if ((drawobj->type == CMDOBJ_TYPE) ||
+			(drawobj->type == MARKEROBJ_TYPE))
+		cmdobj_print(s, CMDOBJ(drawobj));
+
+	seq_puts(s, " flags: ");
+	print_flags(s, drawobj->flags, KGSL_DRAWOBJ_FLAGS);
+	kgsl_drawobj_put(drawobj);
+	seq_puts(s, "\n");
+}
+
+static int ctx_print(struct seq_file *s, void *unused)
+{
+	struct adreno_context *drawctxt = s->private;
+	unsigned int i;
+	struct kgsl_event *event;
+	unsigned int queued = 0, consumed = 0, retired = 0;
+
+	seq_printf(s, "id: %u type: %s priority: %d process: %s (%d) tid: %d\n",
+		   drawctxt->base.id,
+		   kgsl_context_type(drawctxt->type),
+		   drawctxt->base.priority,
+		   drawctxt->base.proc_priv->comm,
+		   pid_nr(drawctxt->base.proc_priv->pid),
+		   drawctxt->base.tid);
+
+	seq_puts(s, "flags: ");
+	print_flags(s, drawctxt->base.flags & ~(KGSL_CONTEXT_PRIORITY_MASK
+		| KGSL_CONTEXT_TYPE_MASK), KGSL_CONTEXT_FLAGS);
+	seq_puts(s, " priv: ");
+	print_flags(s, drawctxt->base.priv,
+		{ BIT(KGSL_CONTEXT_PRIV_SUBMITTED), "submitted"},
+		{ BIT(KGSL_CONTEXT_PRIV_DETACHED), "detached"},
+		{ BIT(KGSL_CONTEXT_PRIV_INVALID), "invalid"},
+		{ BIT(KGSL_CONTEXT_PRIV_PAGEFAULT), "pagefault"},
+		{ BIT(ADRENO_CONTEXT_FAULT), "fault"},
+		{ BIT(ADRENO_CONTEXT_GPU_HANG), "gpu_hang"},
+		{ BIT(ADRENO_CONTEXT_GPU_HANG_FT), "gpu_hang_ft"},
+		{ BIT(ADRENO_CONTEXT_SKIP_EOF), "skip_end_of_frame" },
+		{ BIT(ADRENO_CONTEXT_FORCE_PREAMBLE), "force_preamble"});
+	seq_puts(s, "\n");
+
+	seq_puts(s, "timestamps: ");
+	kgsl_readtimestamp(drawctxt->base.device, &drawctxt->base,
+				KGSL_TIMESTAMP_QUEUED, &queued);
+	kgsl_readtimestamp(drawctxt->base.device, &drawctxt->base,
+				KGSL_TIMESTAMP_CONSUMED, &consumed);
+	kgsl_readtimestamp(drawctxt->base.device, &drawctxt->base,
+				KGSL_TIMESTAMP_RETIRED, &retired);
+	seq_printf(s, "queued: %u consumed: %u retired: %u global:%u\n",
+		   queued, consumed, retired,
+		   drawctxt->internal_timestamp);
+
+	seq_puts(s, "drawqueue:\n");
+
+	spin_lock(&drawctxt->lock);
+	for (i = drawctxt->drawqueue_head;
+		i != drawctxt->drawqueue_tail;
+		i = DRAWQUEUE_NEXT(i, ADRENO_CONTEXT_DRAWQUEUE_SIZE))
+		drawobj_print(s, drawctxt->drawqueue[i]);
+	spin_unlock(&drawctxt->lock);
+
+	seq_puts(s, "events:\n");
+	spin_lock(&drawctxt->base.events.lock);
+	list_for_each_entry(event, &drawctxt->base.events.events, node)
+		seq_printf(s, "\t%d: %pS created: %u\n", event->timestamp,
+				event->func, event->created);
+	spin_unlock(&drawctxt->base.events.lock);
+
+	return 0;
+}
+
+static int ctx_open(struct inode *inode, struct file *file)
+{
+	int ret;
+	struct adreno_context *ctx = inode->i_private;
+
+	if (!_kgsl_context_get(&ctx->base))
+		return -ENODEV;
+
+	ret = single_open(file, ctx_print, &ctx->base);
+	if (ret)
+		kgsl_context_put(&ctx->base);
+	return ret;
+}
+
+static int ctx_release(struct inode *inode, struct file *file)
+{
+	struct kgsl_context *context;
+
+	context = ((struct seq_file *)file->private_data)->private;
+
+	kgsl_context_put(context);
+
+	return single_release(inode, file);
+}
+
+static const struct file_operations ctx_fops = {
+	.open = ctx_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = ctx_release,
+};
+
+
+void
+adreno_context_debugfs_init(struct adreno_device *adreno_dev,
+			    struct adreno_context *ctx)
+{
+	unsigned char name[16];
+
+	/*
+	 * Get the context here to make sure it still exists for the life of the
+	 * file
+	 */
+	_kgsl_context_get(&ctx->base);
+
+	snprintf(name, sizeof(name), "%d", ctx->base.id);
+
+	ctx->debug_root = debugfs_create_file(name, 0444,
+				adreno_dev->ctx_d_debugfs, ctx, &ctx_fops);
+}
+
+static int _bcl_sid0_set(void *data, u64 val)
+{
+	struct kgsl_device *device = data;
+	const struct gmu_dev_ops *ops = GMU_DEVICE_OPS(device);
+
+	if (ops && ops->bcl_sid_set)
+		return ops->bcl_sid_set(device, 0, val);
+
+	return 0;
+}
+
+static int _bcl_sid0_get(void *data, u64 *val)
+{
+	struct kgsl_device *device = data;
+	const struct gmu_dev_ops *ops = GMU_DEVICE_OPS(device);
+
+	if (ops && ops->bcl_sid_get)
+		*val = ops->bcl_sid_get(device, 0);
+
+	return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(_sid0_fops, _bcl_sid0_get, _bcl_sid0_set, "%llu\n");
+
+static int _bcl_sid1_set(void *data, u64 val)
+{
+	struct kgsl_device *device = data;
+	const struct gmu_dev_ops *ops = GMU_DEVICE_OPS(device);
+
+	if (ops && ops->bcl_sid_set)
+		return ops->bcl_sid_set(device, 1, val);
+
+	return 0;
+}
+
+static int _bcl_sid1_get(void *data, u64 *val)
+{
+	struct kgsl_device *device = data;
+	const struct gmu_dev_ops *ops = GMU_DEVICE_OPS(device);
+
+	if (ops && ops->bcl_sid_get)
+		*val = ops->bcl_sid_get(device, 1);
+
+	return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(_sid1_fops, _bcl_sid1_get, _bcl_sid1_set, "%llu\n");
+
+static int _bcl_sid2_set(void *data, u64 val)
+{
+	struct kgsl_device *device = data;
+	const struct gmu_dev_ops *ops = GMU_DEVICE_OPS(device);
+
+	if (ops && ops->bcl_sid_set)
+		return ops->bcl_sid_set(device, 2, val);
+
+	return 0;
+}
+
+static int _bcl_sid2_get(void *data, u64 *val)
+{
+	struct kgsl_device *device = data;
+	const struct gmu_dev_ops *ops = GMU_DEVICE_OPS(device);
+
+	if (ops && ops->bcl_sid_get)
+		*val = ops->bcl_sid_get(device, 2);
+
+	return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(_sid2_fops, _bcl_sid2_get, _bcl_sid2_set, "%llu\n");
+
+static int _bcl_throttle_time_us_get(void *data, u64 *val)
+{
+	struct kgsl_device *device = data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_BCL))
+		*val = 0;
+	else
+		*val = (u64) adreno_dev->bcl_throttle_time_us;
+
+	return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(_bcl_throttle_fops, _bcl_throttle_time_us_get, NULL, "%llu\n");
+
+static int _skipsaverestore_store(void *data, u64 val)
+{
+	struct adreno_device *adreno_dev = data;
+
+	if (adreno_dev->hwsched_enabled)
+		return adreno_power_cycle_bool(adreno_dev,
+			&adreno_dev->preempt.skipsaverestore, val);
+
+	adreno_dev->preempt.skipsaverestore = val ? true : false;
+	return 0;
+
+}
+
+static int _skipsaverestore_show(void *data, u64 *val)
+{
+	struct adreno_device *adreno_dev = data;
+
+	*val = (u64) adreno_dev->preempt.skipsaverestore;
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(skipsaverestore_fops, _skipsaverestore_show, _skipsaverestore_store,
+	"%llu\n");
+
+static int _usesgmem_store(void *data, u64 val)
+{
+	struct adreno_device *adreno_dev = data;
+
+	if (adreno_dev->hwsched_enabled)
+		return adreno_power_cycle_bool(adreno_dev,
+			&adreno_dev->preempt.usesgmem, val);
+
+	adreno_dev->preempt.usesgmem = val ? true : false;
+	return 0;
+
+}
+
+static int _usesgmem_show(void *data, u64 *val)
+{
+	struct adreno_device *adreno_dev = data;
+
+	*val = (u64) adreno_dev->preempt.usesgmem;
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(usesgmem_fops, _usesgmem_show, _usesgmem_store, "%llu\n");
+
+static int _preempt_level_store(void *data, u64 val)
+{
+	struct adreno_device *adreno_dev = data;
+
+	if (adreno_dev->hwsched_enabled)
+		return adreno_power_cycle_u32(adreno_dev,
+			&adreno_dev->preempt.preempt_level,
+			min_t(u64, val, 2));
+
+	adreno_dev->preempt.preempt_level = min_t(u64, val, 2);
+	return 0;
+
+}
+
+static int _preempt_level_show(void *data, u64 *val)
+{
+	struct adreno_device *adreno_dev = data;
+
+	*val = (u64) adreno_dev->preempt.preempt_level;
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(preempt_level_fops, _preempt_level_show, _preempt_level_store, "%llu\n");
+
+static int _warmboot_show(void *data, u64 *val)
+{
+	struct adreno_device *adreno_dev = data;
+
+	*val = (u64)adreno_dev->warmboot_enabled;
+	return 0;
+}
+
+/*
+ * When warmboot feature is enabled from debugfs, the first slumber exit will be a cold boot
+ * and all hfi messages will be recorded, so that warmboot can happen on subsequent slumber
+ * exit. When warmboot feature is disabled from debugfs, every slumber exit will be a coldboot.
+ */
+static int _warmboot_store(void *data, u64 val)
+{
+	struct adreno_device *adreno_dev = data;
+
+	if (adreno_dev->warmboot_enabled == val)
+		return 0;
+
+	return adreno_power_cycle_bool(adreno_dev, &adreno_dev->warmboot_enabled, val);
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(warmboot_fops, _warmboot_show, _warmboot_store, "%llu\n");
+
+static int _ifpc_hyst_store(void *data, u64 val)
+{
+	struct adreno_device *adreno_dev = data;
+	u32 hyst;
+
+	if (!gmu_core_dev_ifpc_isenabled(KGSL_DEVICE(adreno_dev)))
+		return -EINVAL;
+
+	/* IFPC hysteresis timer is 16 bits */
+	hyst = max_t(u32, (u32) (FIELD_GET(GENMASK(15, 0), val)),
+		     adreno_dev->ifpc_hyst_floor);
+
+	if (hyst == adreno_dev->ifpc_hyst)
+		return 0;
+
+	return adreno_power_cycle_u32(adreno_dev,
+			&adreno_dev->ifpc_hyst, hyst);
+}
+
+static int _ifpc_hyst_show(void *data, u64 *val)
+{
+	struct adreno_device *adreno_dev = data;
+
+	*val = (u64) adreno_dev->ifpc_hyst;
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(ifpc_hyst_fops, _ifpc_hyst_show, _ifpc_hyst_store, "%llu\n");
+
+void adreno_debugfs_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct dentry *snapshot_dir;
+
+	if (IS_ERR_OR_NULL(device->d_debugfs))
+		return;
+
+	debugfs_create_file("active_cnt", 0444, device->d_debugfs, device,
+			    &_active_count_fops);
+	adreno_dev->ctx_d_debugfs = debugfs_create_dir("ctx",
+							device->d_debugfs);
+	snapshot_dir = debugfs_lookup("snapshot", kgsl_debugfs_dir);
+
+	if (!IS_ERR_OR_NULL(snapshot_dir))
+		debugfs_create_file("coop_reset", 0644, snapshot_dir, device,
+					&_coop_reset_fops);
+
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_LM)) {
+		debugfs_create_file("lm_limit", 0644, device->d_debugfs, device,
+			&_lm_limit_fops);
+		debugfs_create_file("lm_threshold_count", 0444,
+			device->d_debugfs, device, &_lm_threshold_fops);
+	}
+
+	if (adreno_is_a5xx(adreno_dev))
+		debugfs_create_file("isdb", 0644, device->d_debugfs,
+			device, &_isdb_fops);
+
+	if (gmu_core_isenabled(device))
+		debugfs_create_file("ifpc_hyst", 0644, device->d_debugfs,
+			device, &ifpc_hyst_fops);
+
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_GMU_WARMBOOT))
+		debugfs_create_file("warmboot", 0644, device->d_debugfs,
+			device, &warmboot_fops);
+
+	debugfs_create_file("ctxt_record_size", 0644, snapshot_dir,
+		device, &_ctxt_record_size_fops);
+	debugfs_create_file("gpu_client_pf", 0644, snapshot_dir,
+		device, &_gpu_client_pf_fops);
+	debugfs_create_bool("dump_all_ibs", 0644, snapshot_dir,
+		&device->dump_all_ibs);
+
+	adreno_dev->bcl_debugfs_dir = debugfs_create_dir("bcl", device->d_debugfs);
+	if (!IS_ERR_OR_NULL(adreno_dev->bcl_debugfs_dir)) {
+		debugfs_create_file("sid0", 0644, adreno_dev->bcl_debugfs_dir, device, &_sid0_fops);
+		debugfs_create_file("sid1", 0644, adreno_dev->bcl_debugfs_dir, device, &_sid1_fops);
+		debugfs_create_file("sid2", 0644, adreno_dev->bcl_debugfs_dir, device, &_sid2_fops);
+		debugfs_create_file("bcl_throttle_time_us", 0444, adreno_dev->bcl_debugfs_dir,
+						device, &_bcl_throttle_fops);
+	}
+
+	adreno_dev->preemption_debugfs_dir = debugfs_create_dir("preemption", device->d_debugfs);
+	if (!IS_ERR_OR_NULL(adreno_dev->preemption_debugfs_dir)) {
+		debugfs_create_file("preempt_level", 0644, adreno_dev->preemption_debugfs_dir,
+			device, &preempt_level_fops);
+		debugfs_create_file("usesgmem", 0644, adreno_dev->preemption_debugfs_dir, device,
+			&usesgmem_fops);
+		debugfs_create_file("skipsaverestore", 0644, adreno_dev->preemption_debugfs_dir,
+			device, &skipsaverestore_fops);
+	}
+}
--- a/qcom/opensource/graphics-kernel/adreno_dispatch.c
+++ b/qcom/opensource/graphics-kernel/adreno_dispatch.c
--- a/qcom/opensource/graphics-kernel/adreno_dispatch.h
+++ b/qcom/opensource/graphics-kernel/adreno_dispatch.h
@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2008-2021, The Linux Foundation. All rights reserved.
+ */
+
+#ifndef ____ADRENO_DISPATCHER_H
+#define ____ADRENO_DISPATCHER_H
+
+#include <linux/kobject.h>
+#include <linux/kthread.h>
+#include <linux/llist.h>
+
+extern unsigned int adreno_drawobj_timeout;
+
+/*
+ * Maximum size of the dispatcher ringbuffer - the actual inflight size will be
+ * smaller then this but this size will allow for a larger range of inflight
+ * sizes that can be chosen at runtime
+ */
+
+#define ADRENO_DISPATCH_DRAWQUEUE_SIZE 128
+
+#define DRAWQUEUE_NEXT(_i, _s) (((_i) + 1) % (_s))
+
+/**
+ * struct adreno_dispatcher_drawqueue - List of commands for a RB level
+ * @cmd_q: List of command obj's submitted to dispatcher
+ * @inflight: Number of commands inflight in this q
+ * @head: Head pointer to the q
+ * @tail: Queues tail pointer
+ * @active_context_count: Number of active contexts seen in this rb drawqueue
+ * @expires: The jiffies value at which this drawqueue has run too long
+ */
+struct adreno_dispatcher_drawqueue {
+	struct kgsl_drawobj_cmd *cmd_q[ADRENO_DISPATCH_DRAWQUEUE_SIZE];
+	unsigned int inflight;
+	unsigned int head;
+	unsigned int tail;
+	int active_context_count;
+	unsigned long expires;
+};
+
+/**
+ * struct adreno_dispatch_job - An instance of work for the dispatcher
+ * @node: llist node for the list of jobs
+ * @drawctxt: A pointer to an adreno draw context
+ *
+ * This struct defines work for the dispatcher. When a drawctxt is ready to send
+ * commands it will attach itself to the appropriate list for it's priority.
+ * The dispatcher will process all jobs on each priority every time it goes
+ * through a dispatch cycle
+ */
+struct adreno_dispatch_job {
+	struct llist_node node;
+	struct adreno_context *drawctxt;
+};
+
+/**
+ * struct adreno_dispatcher - container for the adreno GPU dispatcher
+ * @mutex: Mutex to protect the structure
+ * @state: Current state of the dispatcher (active or paused)
+ * @timer: Timer to monitor the progress of the drawobjs
+ * @inflight: Number of drawobj operations pending in the ringbuffer
+ * @fault: Non-zero if a fault was detected.
+ * @pending: Priority list of contexts waiting to submit drawobjs
+ * @work: work_struct to put the dispatcher in a work queue
+ * @kobj: kobject for the dispatcher directory in the device sysfs node
+ * @idle_gate: Gate to wait on for dispatcher to idle
+ */
+struct adreno_dispatcher {
+	struct mutex mutex;
+	unsigned long priv;
+	struct timer_list timer;
+	struct timer_list fault_timer;
+	unsigned int inflight;
+	atomic_t fault;
+	/** @jobs - Array of dispatch job lists for each priority level */
+	struct llist_head jobs[16];
+	/** @requeue - Array of lists for dispatch jobs that got requeued */
+	struct llist_head requeue[16];
+	struct kthread_work work;
+	struct kobject kobj;
+	struct completion idle_gate;
+	struct kthread_worker *worker;
+};
+
+enum adreno_dispatcher_flags {
+	ADRENO_DISPATCHER_POWER = 0,
+	ADRENO_DISPATCHER_ACTIVE,
+	ADRENO_DISPATCHER_INIT,
+};
+
+struct adreno_device;
+struct kgsl_device;
+
+void adreno_dispatcher_start(struct kgsl_device *device);
+int adreno_dispatcher_init(struct adreno_device *adreno_dev);
+int adreno_dispatcher_idle(struct adreno_device *adreno_dev);
+void adreno_dispatcher_stop(struct adreno_device *adreno_dev);
+
+void adreno_dispatcher_start_fault_timer(struct adreno_device *adreno_dev);
+void adreno_dispatcher_stop_fault_timer(struct kgsl_device *device);
+
+void adreno_dispatcher_schedule(struct kgsl_device *device);
+
+/**
+ * adreno_dispatcher_fault - Set dispatcher fault to request recovery
+ * @adreno_dev: A handle to adreno device
+ * @fault: The type of fault
+ */
+void adreno_dispatcher_fault(struct adreno_device *adreno_dev, u32 fault);
+#endif /* __ADRENO_DISPATCHER_H */
--- a/qcom/opensource/graphics-kernel/adreno_drawctxt.c
+++ b/qcom/opensource/graphics-kernel/adreno_drawctxt.c
@ -0,0 +1,678 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/debugfs.h>
+
+#include "adreno.h"
+#include "adreno_trace.h"
+
+static void wait_callback(struct kgsl_device *device,
+		struct kgsl_event_group *group, void *priv, int result)
+{
+	struct adreno_context *drawctxt = priv;
+
+	wake_up_all(&drawctxt->waiting);
+}
+
+static int _check_context_timestamp(struct kgsl_device *device,
+		struct kgsl_context *context, unsigned int timestamp)
+{
+	/* Bail if the drawctxt has been invalidated or destroyed */
+	if (kgsl_context_is_bad(context))
+		return 1;
+
+	return kgsl_check_timestamp(device, context, timestamp);
+}
+
+/**
+ * adreno_drawctxt_dump() - dump information about a draw context
+ * @device: KGSL device that owns the context
+ * @context: KGSL context to dump information about
+ *
+ * Dump specific information about the context to the kernel log.  Used for
+ * fence timeout callbacks
+ */
+void adreno_drawctxt_dump(struct kgsl_device *device,
+		struct kgsl_context *context)
+{
+	unsigned int queue, start, retire;
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(context);
+	int index, pos;
+	char buf[120];
+
+	kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_QUEUED, &queue);
+	kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_CONSUMED, &start);
+	kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED, &retire);
+
+	/*
+	 * We may have kgsl sync obj timer running, which also uses same
+	 * lock, take a lock with software interrupt disabled (bh)
+	 * to avoid spin lock recursion.
+	 *
+	 * Use Spin trylock because dispatcher can acquire drawctxt->lock
+	 * if context is pending and the fence it is waiting on just got
+	 * signalled. Dispatcher acquires drawctxt->lock and tries to
+	 * delete the sync obj timer using del_timer_sync().
+	 * del_timer_sync() waits till timer and its pending handlers
+	 * are deleted. But if the timer expires at the same time,
+	 * timer handler could be waiting on drawctxt->lock leading to a
+	 * deadlock. To prevent this use spin_trylock_bh.
+	 */
+	if (!spin_trylock_bh(&drawctxt->lock)) {
+		dev_err(device->dev, "  context[%u]: could not get lock\n",
+			context->id);
+		return;
+	}
+
+	dev_err(device->dev,
+		"  context[%u]: queue=%u, submit=%u, start=%u, retire=%u\n",
+		context->id, queue, drawctxt->submitted_timestamp,
+		start, retire);
+
+	if (drawctxt->drawqueue_head != drawctxt->drawqueue_tail) {
+		struct kgsl_drawobj *drawobj =
+			drawctxt->drawqueue[drawctxt->drawqueue_head];
+
+		if (test_bit(ADRENO_CONTEXT_FENCE_LOG, &context->priv)) {
+			dev_err(device->dev,
+				"  possible deadlock. Context %u might be blocked for itself\n",
+				context->id);
+			goto stats;
+		}
+
+		if (!kref_get_unless_zero(&drawobj->refcount))
+			goto stats;
+
+		if (drawobj->type == SYNCOBJ_TYPE) {
+			struct kgsl_drawobj_sync *syncobj = SYNCOBJ(drawobj);
+
+			if (kgsl_drawobj_events_pending(syncobj)) {
+				dev_err(device->dev,
+					"  context[%u] (ts=%u) Active sync points:\n",
+					context->id, drawobj->timestamp);
+
+				kgsl_dump_syncpoints(device, syncobj);
+			}
+		}
+
+		kgsl_drawobj_put(drawobj);
+	}
+
+stats:
+	memset(buf, 0, sizeof(buf));
+
+	pos = 0;
+
+	for (index = 0; index < SUBMIT_RETIRE_TICKS_SIZE; index++) {
+		uint64_t msecs;
+		unsigned int usecs;
+
+		if (!drawctxt->submit_retire_ticks[index])
+			continue;
+		msecs = drawctxt->submit_retire_ticks[index] * 10;
+		usecs = do_div(msecs, 192);
+		usecs = do_div(msecs, 1000);
+		pos += scnprintf(buf + pos, sizeof(buf) - pos, "%u.%0u ",
+			(unsigned int)msecs, usecs);
+	}
+	dev_err(device->dev, "  context[%u]: submit times: %s\n",
+		context->id, buf);
+
+	spin_unlock_bh(&drawctxt->lock);
+}
+
+/**
+ * adreno_drawctxt_wait() - sleep until a timestamp expires
+ * @adreno_dev: pointer to the adreno_device struct
+ * @drawctxt: Pointer to the draw context to sleep for
+ * @timetamp: Timestamp to wait on
+ * @timeout: Number of jiffies to wait (0 for infinite)
+ *
+ * Register an event to wait for a timestamp on a context and sleep until it
+ * has past.  Returns < 0 on error, -ETIMEDOUT if the timeout expires or 0
+ * on success
+ */
+int adreno_drawctxt_wait(struct adreno_device *adreno_dev,
+		struct kgsl_context *context,
+		uint32_t timestamp, unsigned int timeout)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(context);
+	int ret;
+	long ret_temp;
+
+	if (kgsl_context_detached(context))
+		return -ENOENT;
+
+	if (kgsl_context_invalid(context))
+		return -EDEADLK;
+
+	trace_adreno_drawctxt_wait_start(-1, context->id, timestamp);
+
+	ret = kgsl_add_event(device, &context->events, timestamp,
+		wait_callback, (void *) drawctxt);
+	if (ret)
+		goto done;
+
+	/*
+	 * If timeout is 0, wait forever. msecs_to_jiffies will force
+	 * values larger than INT_MAX to an infinite timeout.
+	 */
+	if (timeout == 0)
+		timeout = UINT_MAX;
+
+	ret_temp = wait_event_interruptible_timeout(drawctxt->waiting,
+			_check_context_timestamp(device, context, timestamp),
+			msecs_to_jiffies(timeout));
+
+	if (ret_temp <= 0) {
+		kgsl_cancel_event(device, &context->events, timestamp,
+			wait_callback, (void *)drawctxt);
+
+		ret = ret_temp ? (int)ret_temp : -ETIMEDOUT;
+		goto done;
+	}
+	ret = 0;
+
+	/* -EDEADLK if the context was invalidated while we were waiting */
+	if (kgsl_context_invalid(context))
+		ret = -EDEADLK;
+
+
+	/* Return -EINVAL if the context was detached while we were waiting */
+	if (kgsl_context_detached(context))
+		ret = -ENOENT;
+
+done:
+	trace_adreno_drawctxt_wait_done(-1, context->id, timestamp, ret);
+	return ret;
+}
+
+/**
+ * adreno_drawctxt_wait_rb() - Wait for the last RB timestamp at which this
+ * context submitted a command to the corresponding RB
+ * @adreno_dev: The device on which the timestamp is active
+ * @context: The context which subbmitted command to RB
+ * @timestamp: The RB timestamp of last command submitted to RB by context
+ * @timeout: Timeout value for the wait
+ * Caller must hold the device mutex
+ */
+static int adreno_drawctxt_wait_rb(struct adreno_device *adreno_dev,
+		struct kgsl_context *context,
+		uint32_t timestamp, unsigned int timeout)
+{
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(context);
+	int ret = 0;
+
+	/*
+	 * If the context is invalid (OR) not submitted commands to GPU
+	 * then return immediately - we may end up waiting for a timestamp
+	 * that will never come
+	 */
+	if (kgsl_context_invalid(context) ||
+			!test_bit(KGSL_CONTEXT_PRIV_SUBMITTED, &context->priv))
+		goto done;
+
+	trace_adreno_drawctxt_wait_start(drawctxt->rb->id, context->id,
+					timestamp);
+
+	ret = adreno_ringbuffer_waittimestamp(drawctxt->rb, timestamp, timeout);
+done:
+	trace_adreno_drawctxt_wait_done(drawctxt->rb->id, context->id,
+					timestamp, ret);
+	return ret;
+}
+
+static int drawctxt_detach_drawobjs(struct adreno_context *drawctxt,
+		struct kgsl_drawobj **list)
+{
+	int count = 0;
+
+	while (drawctxt->drawqueue_head != drawctxt->drawqueue_tail) {
+		struct kgsl_drawobj *drawobj =
+			drawctxt->drawqueue[drawctxt->drawqueue_head];
+
+		drawctxt->drawqueue_head = (drawctxt->drawqueue_head + 1) %
+			ADRENO_CONTEXT_DRAWQUEUE_SIZE;
+
+		list[count++] = drawobj;
+	}
+
+	return count;
+}
+
+/**
+ * adreno_drawctxt_invalidate() - Invalidate an adreno draw context
+ * @device: Pointer to the KGSL device structure for the GPU
+ * @context: Pointer to the KGSL context structure
+ *
+ * Invalidate the context and remove all queued commands and cancel any pending
+ * waiters
+ */
+void adreno_drawctxt_invalidate(struct kgsl_device *device,
+		struct kgsl_context *context)
+{
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(context);
+	struct kgsl_drawobj *list[ADRENO_CONTEXT_DRAWQUEUE_SIZE];
+	int i, count;
+
+	trace_adreno_drawctxt_invalidate(drawctxt);
+
+	spin_lock(&drawctxt->lock);
+	set_bit(KGSL_CONTEXT_PRIV_INVALID, &context->priv);
+
+	/*
+	 * set the timestamp to the last value since the context is invalidated
+	 * and we want the pending events for this context to go away
+	 */
+	kgsl_sharedmem_writel(device->memstore,
+			KGSL_MEMSTORE_OFFSET(context->id, soptimestamp),
+			drawctxt->timestamp);
+
+	kgsl_sharedmem_writel(device->memstore,
+			KGSL_MEMSTORE_OFFSET(context->id, eoptimestamp),
+			drawctxt->timestamp);
+
+	/* Get rid of commands still waiting in the queue */
+	count = drawctxt_detach_drawobjs(drawctxt, list);
+	spin_unlock(&drawctxt->lock);
+
+	for (i = 0; i < count; i++) {
+		kgsl_cancel_events_timestamp(device, &context->events,
+			list[i]->timestamp);
+		kgsl_drawobj_destroy(list[i]);
+	}
+
+	/* Make sure all pending events are processed or cancelled */
+	kgsl_flush_event_group(device, &context->events);
+
+	/* Give the bad news to everybody waiting around */
+	wake_up_all(&drawctxt->waiting);
+	wake_up_all(&drawctxt->wq);
+	wake_up_all(&drawctxt->timeout);
+}
+
+void adreno_drawctxt_set_guilty(struct kgsl_device *device,
+		struct kgsl_context *context)
+{
+	if (!context)
+		return;
+
+	context->reset_status = KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT;
+
+	adreno_drawctxt_invalidate(device, context);
+}
+
+#define KGSL_CONTEXT_PRIORITY_MED	0x8
+
+/**
+ * adreno_drawctxt_create - create a new adreno draw context
+ * @dev_priv: the owner of the context
+ * @flags: flags for the context (passed from user space)
+ *
+ * Create and return a new draw context for the 3D core.
+ */
+struct kgsl_context *
+adreno_drawctxt_create(struct kgsl_device_private *dev_priv,
+			uint32_t *flags)
+{
+	struct adreno_context *drawctxt;
+	struct kgsl_device *device = dev_priv->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	const struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	int ret;
+	unsigned int local;
+
+	local = *flags & (KGSL_CONTEXT_PREAMBLE |
+		KGSL_CONTEXT_NO_GMEM_ALLOC |
+		KGSL_CONTEXT_PER_CONTEXT_TS |
+		KGSL_CONTEXT_USER_GENERATED_TS |
+		KGSL_CONTEXT_NO_FAULT_TOLERANCE |
+		KGSL_CONTEXT_INVALIDATE_ON_FAULT |
+		KGSL_CONTEXT_CTX_SWITCH |
+		KGSL_CONTEXT_PRIORITY_MASK |
+		KGSL_CONTEXT_TYPE_MASK |
+		KGSL_CONTEXT_PWR_CONSTRAINT |
+		KGSL_CONTEXT_IFH_NOP |
+		KGSL_CONTEXT_SECURE |
+		KGSL_CONTEXT_PREEMPT_STYLE_MASK |
+		KGSL_CONTEXT_LPAC |
+		KGSL_CONTEXT_NO_SNAPSHOT |
+		KGSL_CONTEXT_FAULT_INFO);
+
+	/* Check for errors before trying to initialize */
+
+	/* If preemption is not supported, ignore preemption request */
+	if (!adreno_preemption_feature_set(adreno_dev))
+		local &= ~KGSL_CONTEXT_PREEMPT_STYLE_MASK;
+
+	/* We no longer support legacy context switching */
+	if ((local & KGSL_CONTEXT_PREAMBLE) == 0 ||
+		(local & KGSL_CONTEXT_NO_GMEM_ALLOC) == 0) {
+		dev_err_once(device->dev,
+			"legacy context switch not supported\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* Make sure that our target can support secure contexts if requested */
+	if (!kgsl_mmu_is_secured(&dev_priv->device->mmu) &&
+			(local & KGSL_CONTEXT_SECURE)) {
+		dev_err_once(device->dev, "Secure context not supported\n");
+		return ERR_PTR(-EOPNOTSUPP);
+	}
+
+	if ((local & KGSL_CONTEXT_LPAC) &&
+			(!(adreno_dev->lpac_enabled))) {
+		dev_err_once(device->dev, "LPAC context not supported\n");
+		return ERR_PTR(-EOPNOTSUPP);
+	}
+
+	if ((local & KGSL_CONTEXT_LPAC) && (local & KGSL_CONTEXT_SECURE)) {
+		dev_err_once(device->dev, "LPAC secure context not supported\n");
+		return ERR_PTR(-EOPNOTSUPP);
+	}
+
+	drawctxt = kzalloc(sizeof(struct adreno_context), GFP_KERNEL);
+
+	if (drawctxt == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	drawctxt->timestamp = 0;
+
+	drawctxt->base.flags = local;
+
+	/* Always enable per-context timestamps */
+	drawctxt->base.flags |= KGSL_CONTEXT_PER_CONTEXT_TS;
+	drawctxt->type = (drawctxt->base.flags & KGSL_CONTEXT_TYPE_MASK)
+		>> KGSL_CONTEXT_TYPE_SHIFT;
+	spin_lock_init(&drawctxt->lock);
+	init_waitqueue_head(&drawctxt->wq);
+	init_waitqueue_head(&drawctxt->waiting);
+	init_waitqueue_head(&drawctxt->timeout);
+
+	/* If the priority is not set by user, set it for them */
+	if ((drawctxt->base.flags & KGSL_CONTEXT_PRIORITY_MASK) ==
+			KGSL_CONTEXT_PRIORITY_UNDEF)
+		drawctxt->base.flags |= (KGSL_CONTEXT_PRIORITY_MED <<
+				KGSL_CONTEXT_PRIORITY_SHIFT);
+
+	/* Store the context priority */
+	drawctxt->base.priority =
+		(drawctxt->base.flags & KGSL_CONTEXT_PRIORITY_MASK) >>
+		KGSL_CONTEXT_PRIORITY_SHIFT;
+
+	/*
+	 * Now initialize the common part of the context. This allocates the
+	 * context id, and then possibly another thread could look it up.
+	 * So we want all of our initializtion that doesn't require the context
+	 * id to be done before this call.
+	 */
+	ret = kgsl_context_init(dev_priv, &drawctxt->base);
+	if (ret != 0) {
+		kfree(drawctxt);
+		return ERR_PTR(ret);
+	}
+
+	kgsl_sharedmem_writel(device->memstore,
+			KGSL_MEMSTORE_OFFSET(drawctxt->base.id, soptimestamp),
+			0);
+	kgsl_sharedmem_writel(device->memstore,
+			KGSL_MEMSTORE_OFFSET(drawctxt->base.id, eoptimestamp),
+			0);
+
+	adreno_context_debugfs_init(ADRENO_DEVICE(device), drawctxt);
+
+	INIT_LIST_HEAD(&drawctxt->active_node);
+	INIT_LIST_HEAD(&drawctxt->hw_fence_list);
+	INIT_LIST_HEAD(&drawctxt->hw_fence_inflight_list);
+
+	if (adreno_dev->dispatch_ops && adreno_dev->dispatch_ops->setup_context)
+		adreno_dev->dispatch_ops->setup_context(adreno_dev, drawctxt);
+
+	if (gpudev->preemption_context_init) {
+		ret = gpudev->preemption_context_init(&drawctxt->base);
+		if (ret != 0) {
+			kgsl_context_detach(&drawctxt->base);
+			return ERR_PTR(ret);
+		}
+	}
+
+	/* copy back whatever flags we dediced were valid */
+	*flags = drawctxt->base.flags;
+
+	return &drawctxt->base;
+}
+
+static void wait_for_timestamp_rb(struct kgsl_device *device,
+	struct adreno_context *drawctxt)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_context *context = &drawctxt->base;
+	int ret;
+
+	/*
+	 * internal_timestamp is set in adreno_ringbuffer_addcmds,
+	 * which holds the device mutex.
+	 */
+	mutex_lock(&device->mutex);
+
+	/*
+	 * Wait for the last global timestamp to pass before continuing.
+	 * The maxumum wait time is 30s, some large IB's can take longer
+	 * than 10s and if hang happens then the time for the context's
+	 * commands to retire will be greater than 10s. 30s should be sufficient
+	 * time to wait for the commands even if a hang happens.
+	 */
+	ret = adreno_drawctxt_wait_rb(adreno_dev, &drawctxt->base,
+		drawctxt->internal_timestamp, 30 * 1000);
+
+	/*
+	 * If the wait for global fails due to timeout then mark it as
+	 * context detach timeout fault and schedule dispatcher to kick
+	 * in GPU recovery. For a ADRENO_CTX_DETATCH_TIMEOUT_FAULT we clear
+	 * the policy and invalidate the context. If EAGAIN error is returned
+	 * then recovery will kick in and there will be no more commands in the
+	 * RB pipe from this context which is what we are waiting for, so ignore
+	 * -EAGAIN error.
+	 */
+	if (ret && ret != -EAGAIN) {
+		dev_err(device->dev,
+				"Wait for global ctx=%u ts=%u type=%d error=%d\n",
+				drawctxt->base.id, drawctxt->internal_timestamp,
+				drawctxt->type, ret);
+
+		adreno_set_gpu_fault(adreno_dev,
+				ADRENO_CTX_DETATCH_TIMEOUT_FAULT);
+		mutex_unlock(&device->mutex);
+
+		/* Schedule dispatcher to kick in recovery */
+		adreno_dispatcher_schedule(device);
+
+		/* Wait for context to be invalidated and release context */
+		wait_event_interruptible_timeout(drawctxt->timeout,
+					kgsl_context_invalid(&drawctxt->base),
+					msecs_to_jiffies(5000));
+		return;
+	}
+
+	kgsl_sharedmem_writel(device->memstore,
+			KGSL_MEMSTORE_OFFSET(context->id, soptimestamp),
+			drawctxt->timestamp);
+
+	kgsl_sharedmem_writel(device->memstore,
+			KGSL_MEMSTORE_OFFSET(context->id, eoptimestamp),
+			drawctxt->timestamp);
+
+	adreno_profile_process_results(adreno_dev);
+
+	mutex_unlock(&device->mutex);
+}
+
+void adreno_drawctxt_detach(struct kgsl_context *context)
+{
+	struct kgsl_device *device;
+	struct adreno_device *adreno_dev;
+	const struct adreno_gpudev *gpudev;
+	struct adreno_context *drawctxt;
+	int count, i;
+	struct kgsl_drawobj *list[ADRENO_CONTEXT_DRAWQUEUE_SIZE];
+
+	if (context == NULL)
+		return;
+
+	device = context->device;
+	adreno_dev = ADRENO_DEVICE(device);
+	gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	drawctxt = ADRENO_CONTEXT(context);
+
+	spin_lock(&drawctxt->lock);
+
+	spin_lock(&adreno_dev->active_list_lock);
+	list_del_init(&drawctxt->active_node);
+	spin_unlock(&adreno_dev->active_list_lock);
+
+	count = drawctxt_detach_drawobjs(drawctxt, list);
+	spin_unlock(&drawctxt->lock);
+
+	for (i = 0; i < count; i++) {
+		/*
+		 * If the context is detached while we are waiting for
+		 * the next command in GFT SKIP CMD, print the context
+		 * detached status here.
+		 */
+		adreno_fault_skipcmd_detached(adreno_dev, drawctxt, list[i]);
+		kgsl_drawobj_destroy(list[i]);
+	}
+
+	debugfs_remove_recursive(drawctxt->debug_root);
+	/* The debugfs file has a reference, release it */
+	if (drawctxt->debug_root)
+		kgsl_context_put(context);
+
+	if (gpudev->context_detach)
+		gpudev->context_detach(drawctxt);
+	else
+		wait_for_timestamp_rb(device, drawctxt);
+
+	if (context->user_ctxt_record) {
+		gpumem_free_entry(context->user_ctxt_record);
+
+		/* Put the extra ref from gpumem_alloc_entry() */
+		kgsl_mem_entry_put(context->user_ctxt_record);
+	}
+
+	/* wake threads waiting to submit commands from this context */
+	wake_up_all(&drawctxt->waiting);
+	wake_up_all(&drawctxt->wq);
+}
+
+void adreno_drawctxt_destroy(struct kgsl_context *context)
+{
+	struct adreno_context *drawctxt;
+	struct adreno_device *adreno_dev;
+	const struct adreno_gpudev *gpudev;
+
+	if (context == NULL)
+		return;
+
+	drawctxt = ADRENO_CONTEXT(context);
+
+	adreno_dev = ADRENO_DEVICE(context->device);
+	gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+
+	if (gpudev->context_destroy)
+		gpudev->context_destroy(adreno_dev, drawctxt);
+	kfree(drawctxt);
+}
+
+static void _drawctxt_switch_wait_callback(struct kgsl_device *device,
+		struct kgsl_event_group *group,
+		void *priv, int result)
+{
+	struct adreno_context *drawctxt = (struct adreno_context *) priv;
+
+	kgsl_context_put(&drawctxt->base);
+}
+
+void adreno_put_drawctxt_on_timestamp(struct kgsl_device *device,
+		struct adreno_context *drawctxt,
+		struct adreno_ringbuffer *rb, u32 timestamp)
+{
+	if (!drawctxt)
+		return;
+
+	if (kgsl_add_event(device, &rb->events, timestamp,
+		_drawctxt_switch_wait_callback, drawctxt))
+		kgsl_context_put(&drawctxt->base);
+}
+
+static void _add_context(struct adreno_device *adreno_dev,
+		struct adreno_context *drawctxt)
+{
+	/* Remove it from the list */
+	list_del_init(&drawctxt->active_node);
+
+	/* And push it to the front */
+	drawctxt->active_time = jiffies;
+	list_add(&drawctxt->active_node, &adreno_dev->active_list);
+}
+
+static int __count_context(struct adreno_context *drawctxt, void *data)
+{
+	unsigned long expires = drawctxt->active_time + msecs_to_jiffies(100);
+
+	return time_after(jiffies, expires) ? 0 : 1;
+}
+
+static int __count_drawqueue_context(struct adreno_context *drawctxt,
+				void *data)
+{
+	unsigned long expires = drawctxt->active_time + msecs_to_jiffies(100);
+
+	if (time_after(jiffies, expires))
+		return 0;
+
+	return (&drawctxt->rb->dispatch_q ==
+			(struct adreno_dispatcher_drawqueue *) data) ? 1 : 0;
+}
+
+static int _adreno_count_active_contexts(struct adreno_device *adreno_dev,
+		int (*func)(struct adreno_context *, void *), void *data)
+{
+	struct adreno_context *ctxt;
+	int count = 0;
+
+	list_for_each_entry(ctxt, &adreno_dev->active_list, active_node) {
+		if (func(ctxt, data) == 0)
+			return count;
+
+		count++;
+	}
+
+	return count;
+}
+
+void adreno_track_context(struct adreno_device *adreno_dev,
+		struct adreno_dispatcher_drawqueue *drawqueue,
+		struct adreno_context *drawctxt)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	spin_lock(&adreno_dev->active_list_lock);
+
+	_add_context(adreno_dev, drawctxt);
+
+	device->active_context_count =
+			_adreno_count_active_contexts(adreno_dev,
+					__count_context, NULL);
+
+	if (drawqueue)
+		drawqueue->active_context_count =
+				_adreno_count_active_contexts(adreno_dev,
+					__count_drawqueue_context, drawqueue);
+
+	spin_unlock(&adreno_dev->active_list_lock);
+}
--- a/qcom/opensource/graphics-kernel/adreno_drawctxt.h
+++ b/qcom/opensource/graphics-kernel/adreno_drawctxt.h
@ -0,0 +1,206 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2002,2007-2020, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+#ifndef __ADRENO_DRAWCTXT_H
+#define __ADRENO_DRAWCTXT_H
+
+#include <linux/types.h>
+
+#include "kgsl_device.h"
+
+struct adreno_context_type {
+	unsigned int type;
+	const char *str;
+};
+
+#define ADRENO_CONTEXT_DRAWQUEUE_SIZE 128
+#define SUBMIT_RETIRE_TICKS_SIZE 7
+
+struct kgsl_device;
+struct adreno_device;
+struct kgsl_device_private;
+
+/**
+ * struct adreno_context - Adreno GPU draw context
+ * @timestamp: Last issued context-specific timestamp
+ * @internal_timestamp: Global timestamp of the last issued command
+ *			NOTE: guarded by device->mutex, not drawctxt->mutex!
+ * @type: Context type (GL, CL, RS)
+ * @mutex: Mutex to protect the drawqueue
+ * @drawqueue: Queue of drawobjs waiting to be dispatched for this
+ *			context
+ * @drawqueue_head: Head of the drawqueue queue
+ * @drawqueue_tail: Tail of the drawqueue queue
+ * @wq: Workqueue structure for contexts to sleep pending room in the queue
+ * @waiting: Workqueue structure for contexts waiting for a timestamp or event
+ * @timeout: Workqueue structure for contexts waiting to invalidate
+ * @queued: Number of commands queued in the drawqueue
+ * @fault_policy: GFT fault policy set in _skip_cmd();
+ * @debug_root: debugfs entry for this context.
+ * @queued_timestamp: The last timestamp that was queued on this context
+ * @rb: The ringbuffer in which this context submits commands.
+ * @submitted_timestamp: The last timestamp that was submitted for this context
+ * @submit_retire_ticks: Array to hold command obj execution times from submit
+ *                       to retire
+ * @ticks_index: The index into submit_retire_ticks[] where the new delta will
+ *		 be written.
+ * @active_node: Linkage for nodes in active_list
+ * @active_time: Time when this context last seen
+ */
+struct adreno_context {
+	struct kgsl_context base;
+	unsigned int timestamp;
+	unsigned int internal_timestamp;
+	unsigned int type;
+	spinlock_t lock;
+
+	/* Dispatcher */
+	struct kgsl_drawobj *drawqueue[ADRENO_CONTEXT_DRAWQUEUE_SIZE];
+	unsigned int drawqueue_head;
+	unsigned int drawqueue_tail;
+
+	wait_queue_head_t wq;
+	wait_queue_head_t waiting;
+	wait_queue_head_t timeout;
+
+	int queued;
+	unsigned int fault_policy;
+	struct dentry *debug_root;
+	unsigned int queued_timestamp;
+	struct adreno_ringbuffer *rb;
+	unsigned int submitted_timestamp;
+	uint64_t submit_retire_ticks[SUBMIT_RETIRE_TICKS_SIZE];
+	int ticks_index;
+
+	struct list_head active_node;
+	unsigned long active_time;
+	/** @gmu_context_queue: Queue to dispatch submissions to GMU */
+	struct kgsl_memdesc gmu_context_queue;
+	/** @gmu_hw_fence_queue: Queue for GMU to store hardware fences for this context */
+	struct kgsl_memdesc gmu_hw_fence_queue;
+	/** @hw_fence_list: List of hardware fences(sorted by timestamp) not yet submitted to GMU */
+	struct list_head hw_fence_list;
+	/** @hw_fence_inflight_list: List of hardware fences submitted to GMU */
+	struct list_head hw_fence_inflight_list;
+	/** @hw_fence_count: Number of hardware fences not yet sent to Tx Queue */
+	u32 hw_fence_count;
+	/** @syncobj_timestamp: Timestamp to check whether GMU has consumed a syncobj */
+	u32 syncobj_timestamp;
+};
+
+/* Flag definitions for flag field in adreno_context */
+
+/**
+ * enum adreno_context_priv - Private flags for an adreno draw context
+ * @ADRENO_CONTEXT_FAULT - set if the context has faulted (and recovered)
+ * @ADRENO_CONTEXT_GPU_HANG - Context has caused a GPU hang
+ * @ADRENO_CONTEXT_GPU_HANG_FT - Context has caused a GPU hang
+ *      and fault tolerance was successful
+ * @ADRENO_CONTEXT_SKIP_EOF - Context skip IBs until the next end of frame
+ *      marker.
+ * @ADRENO_CONTEXT_FORCE_PREAMBLE - Force the preamble for the next submission.
+ * @ADRENO_CONTEXT_SKIP_CMD - Context's drawobj's skipped during
+	fault tolerance.
+ * @ADRENO_CONTEXT_FENCE_LOG - Dump fences on this context.
+ */
+enum adreno_context_priv {
+	ADRENO_CONTEXT_FAULT = KGSL_CONTEXT_PRIV_DEVICE_SPECIFIC,
+	ADRENO_CONTEXT_GPU_HANG,
+	ADRENO_CONTEXT_GPU_HANG_FT,
+	ADRENO_CONTEXT_SKIP_EOF,
+	ADRENO_CONTEXT_FORCE_PREAMBLE,
+	ADRENO_CONTEXT_SKIP_CMD,
+	ADRENO_CONTEXT_FENCE_LOG,
+};
+
+struct kgsl_context *adreno_drawctxt_create(
+			struct kgsl_device_private *dev_priv,
+			uint32_t *flags);
+
+void adreno_drawctxt_detach(struct kgsl_context *context);
+
+void adreno_drawctxt_destroy(struct kgsl_context *context);
+
+struct adreno_ringbuffer;
+struct adreno_dispatcher_drawqueue;
+
+int adreno_drawctxt_wait(struct adreno_device *adreno_dev,
+		struct kgsl_context *context,
+		uint32_t timestamp, unsigned int timeout);
+
+void adreno_drawctxt_invalidate(struct kgsl_device *device,
+		struct kgsl_context *context);
+
+void adreno_drawctxt_dump(struct kgsl_device *device,
+		struct kgsl_context *context);
+
+/**
+ * adreno_drawctxt_detached - Helper function to check if a context is detached
+ * @drawctxt: Adreno drawctxt to check
+ *
+ * Return: True if the context isn't null and it has been detached
+ */
+static inline bool adreno_drawctxt_detached(struct adreno_context *drawctxt)
+{
+	return (drawctxt && kgsl_context_detached(&drawctxt->base));
+}
+
+/**
+ * adreno_put_drawctxt_on_timestamp - Put the refcount on the drawctxt when the
+ * timestamp expires
+ * @device: A KGSL device handle
+ * @drawctxt: The draw context to put away
+ * @rb: The ringbuffer that will trigger the timestamp event
+ * @timestamp: The timestamp on @rb that will trigger the event
+ *
+ * Add an event to put the refcount on @drawctxt after @timestamp expires on
+ * @rb. This is used by the context switch to safely put away the context after
+ * a new context is switched in.
+ */
+void adreno_put_drawctxt_on_timestamp(struct kgsl_device *device,
+		struct adreno_context *drawctxt,
+		struct adreno_ringbuffer *rb, u32 timestamp);
+
+/**
+ * adreno_drawctxt_get_pagetable - Helper function to return the pagetable for a
+ * context
+ * @drawctxt: The adreno draw context to query
+ *
+ * Return: A pointer to the pagetable for the process that owns the context or
+ * NULL
+ */
+static inline struct kgsl_pagetable *
+adreno_drawctxt_get_pagetable(struct adreno_context *drawctxt)
+{
+	if (drawctxt)
+		return drawctxt->base.proc_priv->pagetable;
+
+	return NULL;
+}
+
+/**
+ * adreno_drawctxt_set_guilty - Mark a context as guilty and invalidate it
+ * @device: Pointer to a GPU device handle
+ * @context: Pointer to the context to invalidate
+ *
+ * Mark the specified context as guilty and invalidate it
+ */
+void adreno_drawctxt_set_guilty(struct kgsl_device *device,
+		struct kgsl_context *context);
+
+/**
+ * adreno_track_context - Add a context to active list and keep track of active contexts
+ * @adreno_dev: Pointer to adreno device
+ * @drawqueue: Pointer to the dispatch queue to which context send commands
+ * @drawctxt: Draw context which is to be tracked
+ *
+ * Add the given draw context to the active list and update number of contexts which
+ * are active overall as well as which are active on the dispatch queue to which
+ * the given context sends commands.
+ */
+void adreno_track_context(struct adreno_device *adreno_dev,
+		struct adreno_dispatcher_drawqueue *drawqueue,
+		struct adreno_context *drawctxt);
+#endif  /* __ADRENO_DRAWCTXT_H */
--- a/qcom/opensource/graphics-kernel/adreno_gen7.c
+++ b/qcom/opensource/graphics-kernel/adreno_gen7.c
--- a/qcom/opensource/graphics-kernel/adreno_gen7.h
+++ b/qcom/opensource/graphics-kernel/adreno_gen7.h
@ -0,0 +1,519 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _ADRENO_GEN7_H_
+#define _ADRENO_GEN7_H_
+
+#include <linux/delay.h>
+
+#include "gen7_reg.h"
+#include "adreno_gen7_gmu.h"
+
+/* Forward struct declaration */
+struct gen7_snapshot_block_list;
+
+extern const struct adreno_power_ops gen7_gmu_power_ops;
+extern const struct adreno_power_ops gen7_hwsched_power_ops;
+extern const struct adreno_perfcounters adreno_gen7_perfcounters;
+extern const struct adreno_perfcounters adreno_gen7_hwsched_perfcounters;
+extern const struct adreno_perfcounters adreno_gen7_9_0_hwsched_perfcounters;
+
+struct gen7_gpudev {
+	struct adreno_gpudev base;
+	int (*hfi_probe)(struct adreno_device *adreno_dev);
+	void (*hfi_remove)(struct adreno_device *adreno_dev);
+	void (*handle_watchdog)(struct adreno_device *adreno_dev);
+};
+
+extern const struct gen7_gpudev adreno_gen7_gmu_gpudev;
+extern const struct gen7_gpudev adreno_gen7_hwsched_gpudev;
+extern const struct gen7_gpudev adreno_gen7_9_0_hwsched_gpudev;
+
+/**
+ * struct gen7_device - Container for the gen7_device
+ */
+struct gen7_device {
+	/** @gmu: Container for the gen7 GMU device */
+	struct gen7_gmu_device gmu;
+	/** @adreno_dev: Container for the generic adreno device */
+	struct adreno_device adreno_dev;
+};
+
+/**
+ * struct gen7_protected_regs - container for a protect register span
+ */
+struct gen7_protected_regs {
+	/** @reg: Physical protected mode register to write to */
+	u32 reg;
+	/** @start: Dword offset of the starting register in the range */
+	u32 start;
+	/**
+	 * @end: Dword offset of the ending register in the range
+	 * (inclusive)
+	 */
+	u32 end;
+	/**
+	 * @noaccess: 1 if the register should not be accessible from
+	 * userspace, 0 if it can be read (but not written)
+	 */
+	u32 noaccess;
+};
+
+/**
+ * struct adreno_gen7_core - gen7 specific GPU core definitions
+ */
+struct adreno_gen7_core {
+	/** @base: Container for the generic GPU definitions */
+	struct adreno_gpu_core base;
+	/** @gmu_fw_version: Minimum firmware version required to support this core */
+	u32 gmu_fw_version;
+	/** @sqefw_name: Name of the SQE microcode file */
+	const char *sqefw_name;
+	/** @aqefw_name: Name of the AQE microcode file */
+	const char *aqefw_name;
+	/** @gmufw_name: Name of the GMU firmware file */
+	const char *gmufw_name;
+	/** @gmufw_name: Name of the backup GMU firmware file */
+	const char *gmufw_bak_name;
+	/** @zap_name: Name of the CPZ zap file */
+	const char *zap_name;
+	/** @hwcg: List of registers and values to write for HWCG */
+	const struct kgsl_regmap_list *hwcg;
+	/** @hwcg_count: Number of registers in @hwcg */
+	u32 hwcg_count;
+	/** @ao_hwcg: List of registers and values to write for HWCG in AO block */
+	const struct kgsl_regmap_list *ao_hwcg;
+	/** @ao_hwcg_count: Number of registers in @ao_hwcg */
+	u32 ao_hwcg_count;
+	/** @gbif: List of registers and values to write for GBIF */
+	const struct kgsl_regmap_list *gbif;
+	/** @gbif_count: Number of registers in @gbif */
+	u32 gbif_count;
+	/** @hang_detect_cycles: Hang detect counter timeout value */
+	u32 hang_detect_cycles;
+	/** @protected_regs: Array of protected registers for the target */
+	const struct gen7_protected_regs *protected_regs;
+	/** @ctxt_record_size: Size of the preemption record in bytes */
+	u64 ctxt_record_size;
+	/** @highest_bank_bit: Highest bank bit value */
+	u32 highest_bank_bit;
+	/** @gen7_snapshot_block_list: Device-specific blocks dumped in the snapshot */
+	const struct gen7_snapshot_block_list *gen7_snapshot_block_list;
+	/** @gmu_hub_clk_freq: Gmu hub interface clock frequency */
+	u64 gmu_hub_clk_freq;
+	/**
+	 * @bcl_data: bit 0 contains response type for bcl alarms and bits 1:21 controls sid vals
+	 * to configure throttle levels for bcl alarm levels 0-2. If sid vals are not set,
+	 * gmu fw sets default throttle levels.
+	 */
+	u32 bcl_data;
+	/** @preempt_level: Preemption level valid ranges [0 to 2] */
+	u32 preempt_level;
+	/** @qos_value: GPU qos value to set for each RB. */
+	const u32 *qos_value;
+	/**
+	 * @acv_perfmode_ddr_freq: Vote perfmode when DDR frequency >= acv_perfmode_ddr_freq.
+	 * If not specified, vote perfmode for highest DDR level only.
+	 */
+	u32 acv_perfmode_ddr_freq;
+	/** @acv_perfmode_vote: ACV vote for GPU perfmode */
+	u32 acv_perfmode_vote;
+	/** @rt_bus_hint: IB level hint for real time clients i.e. RB-0 */
+	const u32 rt_bus_hint;
+	/** @fast_bus_hint: Whether or not to increase IB vote on high ddr stall */
+	bool fast_bus_hint;
+	/** @noc_timeout_us: GPU config NOC port timeout in usec */
+	u32 noc_timeout_us;
+};
+
+/**
+ * struct gen7_cp_preemption_record - CP context record for
+ * preemption.
+ * @magic: (00) Value at this offset must be equal to
+ * GEN7_CP_CTXRECORD_MAGIC_REF.
+ * @info: (04) Type of record. Written non-zero (usually) by CP.
+ * we must set to zero for all ringbuffers.
+ * @errno: (08) Error code. Initialize this to GEN7_CP_CTXRECORD_ERROR_NONE.
+ * CP will update to another value if a preemption error occurs.
+ * @data: (12) DATA field in YIELD and SET_MARKER packets.
+ * Written by CP when switching out. Not used on switch-in. Initialized to 0.
+ * @cntl: (16) RB_CNTL, saved and restored by CP. We must initialize this.
+ * @rptr: (20) RB_RPTR, saved and restored by CP. We must initialize this.
+ * @wptr: (24) RB_WPTR, saved and restored by CP. We must initialize this.
+ * @_pad28: (28) Reserved/padding.
+ * @rptr_addr: (32) RB_RPTR_ADDR_LO|HI saved and restored. We must initialize.
+ * rbase: (40) RB_BASE_LO|HI saved and restored.
+ * counter: (48) Pointer to preemption counter.
+ * @bv_rptr_addr: (56) BV_RB_RPTR_ADDR_LO|HI save and restored. We must initialize.
+ */
+struct gen7_cp_preemption_record {
+	u32 magic;
+	u32 info;
+	u32 errno;
+	u32 data;
+	u32 cntl;
+	u32 rptr;
+	u32 wptr;
+	u32 _pad28;
+	u64 rptr_addr;
+	u64 rbase;
+	u64 counter;
+	u64 bv_rptr_addr;
+};
+
+/**
+ * struct gen7_cp_smmu_info - CP preemption SMMU info.
+ * @magic: (00) The value at this offset must be equal to
+ * GEN7_CP_SMMU_INFO_MAGIC_REF
+ * @_pad4: (04) Reserved/padding
+ * @ttbr0: (08) Base address of the page table for the * incoming context
+ * @asid: (16) Address Space IDentifier (ASID) of the incoming context
+ * @context_idr: (20) Context Identification Register value
+ * @context_bank: (24) Which Context Bank in SMMU to update
+ */
+struct gen7_cp_smmu_info {
+	u32 magic;
+	u32 _pad4;
+	u64 ttbr0;
+	u32 asid;
+	u32 context_idr;
+	u32 context_bank;
+};
+
+#define GEN7_CP_SMMU_INFO_MAGIC_REF		0x241350d5UL
+
+#define GEN7_CP_CTXRECORD_MAGIC_REF		0xae399d6eUL
+/* Size of each CP preemption record */
+#define GEN7_CP_CTXRECORD_SIZE_IN_BYTES		(4192 * 1024)
+/* Size of the user context record block (in bytes) */
+#define GEN7_CP_CTXRECORD_USER_RESTORE_SIZE	(192 * 1024)
+/* Size of the performance counter save/restore block (in bytes) */
+#define GEN7_CP_PERFCOUNTER_SAVE_RESTORE_SIZE	(4 * 1024)
+
+#define GEN7_CP_RB_CNTL_DEFAULT \
+	(FIELD_PREP(GENMASK(7, 0), ilog2(KGSL_RB_DWORDS >> 1)) | \
+	 FIELD_PREP(GENMASK(12, 8), ilog2(4)))
+
+/* Size of the CP_INIT pm4 stream in dwords */
+#define GEN7_CP_INIT_DWORDS 10
+
+#define GEN7_INT_MASK \
+	((1 << GEN7_INT_AHBERROR) |			\
+	 (1 << GEN7_INT_ATBASYNCFIFOOVERFLOW) |		\
+	 (1 << GEN7_INT_GPCERROR) |			\
+	 (1 << GEN7_INT_SWINTERRUPT) |			\
+	 (1 << GEN7_INT_HWERROR) |			\
+	 (1 << GEN7_INT_PM4CPINTERRUPT) |		\
+	 (1 << GEN7_INT_RB_DONE_TS) |			\
+	 (1 << GEN7_INT_CACHE_CLEAN_TS) |		\
+	 (1 << GEN7_INT_ATBBUSOVERFLOW) |		\
+	 (1 << GEN7_INT_HANGDETECTINTERRUPT) |		\
+	 (1 << GEN7_INT_OUTOFBOUNDACCESS) |		\
+	 (1 << GEN7_INT_UCHETRAPINTERRUPT) |		\
+	 (1 << GEN7_INT_TSBWRITEERROR) |		\
+	 (1 << GEN7_INT_SWFUSEVIOLATION))
+
+#define GEN7_HWSCHED_INT_MASK \
+	((1 << GEN7_INT_AHBERROR) |			\
+	 (1 << GEN7_INT_ATBASYNCFIFOOVERFLOW) |		\
+	 (1 << GEN7_INT_ATBBUSOVERFLOW) |		\
+	 (1 << GEN7_INT_OUTOFBOUNDACCESS) |		\
+	 (1 << GEN7_INT_UCHETRAPINTERRUPT))
+
+/**
+ * to_gen7_core - return the gen7 specific GPU core struct
+ * @adreno_dev: An Adreno GPU device handle
+ *
+ * Returns:
+ * A pointer to the gen7 specific GPU core struct
+ */
+static inline const struct adreno_gen7_core *
+to_gen7_core(struct adreno_device *adreno_dev)
+{
+	const struct adreno_gpu_core *core = adreno_dev->gpucore;
+
+	return container_of(core, struct adreno_gen7_core, base);
+}
+
+/* Preemption functions */
+void gen7_preemption_trigger(struct adreno_device *adreno_dev, bool atomic);
+void gen7_preemption_schedule(struct adreno_device *adreno_dev);
+void gen7_preemption_start(struct adreno_device *adreno_dev);
+int gen7_preemption_init(struct adreno_device *adreno_dev);
+
+u32 gen7_preemption_post_ibsubmit(struct adreno_device *adreno_dev,
+		unsigned int *cmds);
+u32 gen7_preemption_pre_ibsubmit(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb, struct adreno_context *drawctxt,
+		u32 *cmds);
+
+unsigned int gen7_set_marker(unsigned int *cmds,
+		enum adreno_cp_marker_type type);
+
+void gen7_preemption_callback(struct adreno_device *adreno_dev, int bit);
+
+int gen7_preemption_context_init(struct kgsl_context *context);
+
+void gen7_preemption_context_destroy(struct kgsl_context *context);
+
+void gen7_preemption_prepare_postamble(struct adreno_device *adreno_dev);
+
+void gen7_snapshot(struct adreno_device *adreno_dev,
+		struct kgsl_snapshot *snapshot);
+void gen7_crashdump_init(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_snapshot_external_core_regs - Dump external registers into snapshot
+ * @device: Pointer to KGSL device
+ * @snapshot: Pointer to the snapshot
+ *
+ * Dump external core registers like GPUCC, CPR into GPU snapshot.
+ */
+void gen7_snapshot_external_core_regs(struct kgsl_device *device,
+		struct kgsl_snapshot *snapshot);
+
+/**
+ * gen7_start - Program gen7 registers
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * This function does all gen7 register programming every
+ * time we boot the gpu
+ *
+ * Return: 0 on success or negative on failure
+ */
+int gen7_start(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_init - Initialize gen7 resources
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * This function does gen7 specific one time initialization
+ * and is invoked when the very first client opens a
+ * kgsl instance
+ *
+ * Return: Zero on success and negative error on failure
+ */
+int gen7_init(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_cx_timer_init - Initialize the CX timer on Gen7 devices
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Synchronize the GPU CX timer (if we have one) with the CPU timer
+ */
+void gen7_cx_timer_init(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_get_gpu_feature_info - Get hardware supported feature info
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Get HW supported feature info and update sofware feature configuration
+ */
+void gen7_get_gpu_feature_info(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_rb_start - Gen7 specific ringbuffer setup
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * This function does gen7 specific ringbuffer setup and
+ * attempts to submit CP INIT and bring GPU out of secure mode
+ *
+ * Return: Zero on success and negative error on failure
+ */
+int gen7_rb_start(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_microcode_read - Get the cp microcode from the filesystem
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * This function gets the firmware from filesystem and sets up
+ * the micorocode global buffer
+ *
+ * Return: Zero on success and negative error on failure
+ */
+int gen7_microcode_read(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_probe_common - Probe common gen7 resources
+ * @pdev: Pointer to the platform device
+ * @adreno_dev: Pointer to the adreno device
+ * @chipid: Chipid of the target
+ * @gpucore: Pointer to the gpucore strucure
+ *
+ * This function sets up the gen7 resources common across all
+ * gen7 targets
+ */
+int gen7_probe_common(struct platform_device *pdev,
+	struct adreno_device *adreno_dev, u32 chipid,
+	const struct adreno_gpu_core *gpucore);
+
+/**
+ * gen7_hw_isidle - Check whether gen7 gpu is idle or not
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * Return: True if gpu is idle, otherwise false
+ */
+bool gen7_hw_isidle(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_spin_idle_debug - Debug logging used when gpu fails to idle
+ * @adreno_dev: An Adreno GPU handle
+ * @str: String describing the failure
+ *
+ * This function logs interesting registers and triggers a snapshot
+ */
+void gen7_spin_idle_debug(struct adreno_device *adreno_dev,
+	const char *str);
+
+/**
+ * gen7_perfcounter_update - Update the IFPC perfcounter list
+ * @adreno_dev: An Adreno GPU handle
+ * @reg: Perfcounter reg struct to add/remove to the list
+ * @update_reg: true if the perfcounter needs to be programmed by the CPU
+ * @pipe: pipe id for CP aperture control
+ * @flags: Flags set for requested perfcounter group
+ *
+ * Return: 0 on success or -EBUSY if the lock couldn't be taken
+ */
+int gen7_perfcounter_update(struct adreno_device *adreno_dev,
+	struct adreno_perfcount_register *reg, bool update_reg, u32 pipe,
+	unsigned long flags);
+
+/*
+ * gen7_ringbuffer_init - Initialize the ringbuffers
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * Initialize the ringbuffer(s) for a5xx.
+ * Return: 0 on success or negative on failure
+ */
+int gen7_ringbuffer_init(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_ringbuffer_submitcmd - Submit a user command to the ringbuffer
+ * @adreno_dev: An Adreno GPU handle
+ * @cmdobj: Pointer to a user command object
+ * @flags: Internal submit flags
+ * @time: Optional pointer to a adreno_submit_time container
+ *
+ * Return: 0 on success or negative on failure
+ */
+int gen7_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
+		struct kgsl_drawobj_cmd *cmdobj, u32 flags,
+		struct adreno_submit_time *time);
+
+/**
+ * gen7_ringbuffer_submit - Submit a command to the ringbuffer
+ * @rb: Ringbuffer pointer
+ * @time: Optional pointer to a adreno_submit_time container
+ *
+ * Return: 0 on success or negative on failure
+ */
+int gen7_ringbuffer_submit(struct adreno_ringbuffer *rb,
+		struct adreno_submit_time *time);
+
+/**
+ * gen7_fenced_write - Write to a fenced register
+ * @adreno_dev: An Adreno GPU handle
+ * @offset: Register offset
+ * @value: Value to write
+ * @mask: Expected FENCE_STATUS for successful write
+ *
+ * Return: 0 on success or negative on failure
+ */
+int gen7_fenced_write(struct adreno_device *adreno_dev, u32 offset,
+		u32 value, u32 mask);
+
+/**
+ * gen77ringbuffer_addcmds - Wrap and submit commands to the ringbuffer
+ * @adreno_dev: An Adreno GPU handle
+ * @rb: Ringbuffer pointer
+ * @drawctxt: Draw context submitting the commands
+ * @flags: Submission flags
+ * @in: Input buffer to write to ringbuffer
+ * @dwords: Dword length of @in
+ * @timestamp: Draw context timestamp for the submission
+ * @time: Optional pointer to a adreno_submit_time container
+ *
+ * Return: 0 on success or negative on failure
+ */
+int gen7_ringbuffer_addcmds(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb, struct adreno_context *drawctxt,
+		u32 flags, u32 *in, u32 dwords, u32 timestamp,
+		struct adreno_submit_time *time);
+
+/**
+ * gen7_cp_init_cmds - Create the CP_INIT commands
+ * @adreno_dev: An Adreno GPU handle
+ * @cmd: Buffer to write the CP_INIT commands into
+ */
+void gen7_cp_init_cmds(struct adreno_device *adreno_dev, u32 *cmds);
+
+/**
+ * gen7_gmu_hfi_probe - Probe Gen7 HFI specific data
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * Return: 0 on success or negative on failure
+ */
+int gen7_gmu_hfi_probe(struct adreno_device *adreno_dev);
+
+static inline const struct gen7_gpudev *
+to_gen7_gpudev(const struct adreno_gpudev *gpudev)
+{
+	return container_of(gpudev, struct gen7_gpudev, base);
+}
+
+/**
+ * gen7_reset_preempt_records - Reset the preemption buffers
+ * @adreno_dev: Handle to the adreno device
+ *
+ * Reset the preemption records at the time of hard reset
+ */
+void gen7_reset_preempt_records(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_enable_ahb_timeout_detection - Program AHB control registers
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * Program AHB control registers to enable AHB timeout detection.
+ */
+void gen7_enable_ahb_timeout_detection(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_rdpm_mx_freq_update - Update the mx frequency
+ * @gmu: An Adreno GMU handle
+ * @freq: Frequency in KHz
+ *
+ * This function communicates GPU mx frequency(in Mhz) changes to rdpm.
+ */
+void gen7_rdpm_mx_freq_update(struct gen7_gmu_device *gmu, u32 freq);
+
+/**
+ * gen7_rdpm_cx_freq_update - Update the cx frequency
+ * @gmu: An Adreno GMU handle
+ * @freq: Frequency in KHz
+ *
+ * This function communicates GPU cx frequency(in Mhz) changes to rdpm.
+ */
+void gen7_rdpm_cx_freq_update(struct gen7_gmu_device *gmu, u32 freq);
+
+/**
+ * gen7_scm_gpu_init_cx_regs - Program gpu regs for feature support
+ * @adreno_dev: Handle to the adreno device
+ *
+ * Program gpu regs for feature support. Scm call for the same
+ * is added from kernel version 6.0 onwards.
+ *
+ * Return: 0 on success or negative on failure
+ */
+int gen7_scm_gpu_init_cx_regs(struct adreno_device *adreno_dev);
+
+#ifdef CONFIG_QCOM_KGSL_CORESIGHT
+void gen7_coresight_init(struct adreno_device *device);
+#else
+static inline void gen7_coresight_init(struct adreno_device *device) { }
+#endif
+
+#endif
--- a/qcom/opensource/graphics-kernel/adreno_gen7_0_0_snapshot.h
+++ b/qcom/opensource/graphics-kernel/adreno_gen7_0_0_snapshot.h
@ -0,0 +1,927 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+#ifndef __ADRENO_GEN7_0_0_SNAPSHOT_H
+#define __ADRENO_GEN7_0_0_SNAPSHOT_H
+
+#include "adreno_gen7_snapshot.h"
+
+static const u32 gen7_0_0_debugbus_blocks[] = {
+	DEBUGBUS_CP_0_0,
+	DEBUGBUS_CP_0_1,
+	DEBUGBUS_RBBM,
+	DEBUGBUS_HLSQ,
+	DEBUGBUS_UCHE_0,
+	DEBUGBUS_TESS_BR,
+	DEBUGBUS_TESS_BV,
+	DEBUGBUS_PC_BR,
+	DEBUGBUS_PC_BV,
+	DEBUGBUS_VFDP_BR,
+	DEBUGBUS_VFDP_BV,
+	DEBUGBUS_VPC_BR,
+	DEBUGBUS_VPC_BV,
+	DEBUGBUS_TSE_BR,
+	DEBUGBUS_TSE_BV,
+	DEBUGBUS_RAS_BR,
+	DEBUGBUS_RAS_BV,
+	DEBUGBUS_VSC,
+	DEBUGBUS_COM_0,
+	DEBUGBUS_LRZ_BR,
+	DEBUGBUS_LRZ_BV,
+	DEBUGBUS_UFC_0,
+	DEBUGBUS_UFC_1,
+	DEBUGBUS_GMU_GX,
+	DEBUGBUS_DBGC,
+	DEBUGBUS_GPC_BR,
+	DEBUGBUS_GPC_BV,
+	DEBUGBUS_LARC,
+	DEBUGBUS_HLSQ_SPTP,
+	DEBUGBUS_RB_0,
+	DEBUGBUS_RB_1,
+	DEBUGBUS_RB_2,
+	DEBUGBUS_RB_3,
+	DEBUGBUS_UCHE_WRAPPER,
+	DEBUGBUS_CCU_0,
+	DEBUGBUS_CCU_1,
+	DEBUGBUS_CCU_2,
+	DEBUGBUS_CCU_3,
+	DEBUGBUS_VFD_BR_0,
+	DEBUGBUS_VFD_BR_1,
+	DEBUGBUS_VFD_BR_2,
+	DEBUGBUS_VFD_BR_3,
+	DEBUGBUS_VFD_BR_4,
+	DEBUGBUS_VFD_BR_5,
+	DEBUGBUS_VFD_BR_6,
+	DEBUGBUS_VFD_BR_7,
+	DEBUGBUS_VFD_BV_0,
+	DEBUGBUS_VFD_BV_1,
+	DEBUGBUS_VFD_BV_2,
+	DEBUGBUS_VFD_BV_3,
+	DEBUGBUS_USP_0,
+	DEBUGBUS_USP_1,
+	DEBUGBUS_USP_2,
+	DEBUGBUS_USP_3,
+	DEBUGBUS_TP_0,
+	DEBUGBUS_TP_1,
+	DEBUGBUS_TP_2,
+	DEBUGBUS_TP_3,
+	DEBUGBUS_TP_4,
+	DEBUGBUS_TP_5,
+	DEBUGBUS_TP_6,
+	DEBUGBUS_TP_7,
+	DEBUGBUS_USPTP_0,
+	DEBUGBUS_USPTP_1,
+	DEBUGBUS_USPTP_2,
+	DEBUGBUS_USPTP_3,
+	DEBUGBUS_USPTP_4,
+	DEBUGBUS_USPTP_5,
+	DEBUGBUS_USPTP_6,
+	DEBUGBUS_USPTP_7,
+};
+
+static struct gen7_shader_block gen7_0_0_shader_blocks[] = {
+	{TP0_TMO_DATA,                 0x200, 4, 2, PIPE_BR, USPTP},
+	{TP0_SMO_DATA,                  0x80, 4, 2, PIPE_BR, USPTP},
+	{TP0_MIPMAP_BASE_DATA,         0x3c0, 4, 2, PIPE_BR, USPTP},
+	{SP_INST_DATA,                 0x800, 4, 2, PIPE_BR, USPTP},
+	{SP_INST_DATA_1,               0x800, 4, 2, PIPE_BR, USPTP},
+	{SP_LB_0_DATA,                 0x800, 4, 2, PIPE_BR, USPTP},
+	{SP_LB_1_DATA,                 0x800, 4, 2, PIPE_BR, USPTP},
+	{SP_LB_2_DATA,                 0x800, 4, 2, PIPE_BR, USPTP},
+	{SP_LB_3_DATA,                 0x800, 4, 2, PIPE_BR, USPTP},
+	{SP_LB_4_DATA,                 0x800, 4, 2, PIPE_BR, USPTP},
+	{SP_LB_5_DATA,                 0x800, 4, 2, PIPE_BR, USPTP},
+	{SP_LB_6_DATA,                 0x800, 4, 2, PIPE_BR, USPTP},
+	{SP_LB_7_DATA,                 0x800, 4, 2, PIPE_BR, USPTP},
+	{SP_CB_RAM,                    0x390, 4, 2, PIPE_BR, USPTP,},
+	{SP_INST_TAG,                   0x90, 4, 2, PIPE_BR, USPTP},
+	{SP_INST_DATA_2,               0x200, 4, 2, PIPE_BR, USPTP},
+	{SP_TMO_TAG,                    0x80, 4, 2, PIPE_BR, USPTP},
+	{SP_SMO_TAG,                    0x80, 4, 2, PIPE_BR, USPTP},
+	{SP_STATE_DATA,                 0x40, 4, 2, PIPE_BR, USPTP},
+	{SP_HWAVE_RAM,                 0x100, 4, 2, PIPE_BR, USPTP},
+	{SP_L0_INST_BUF,                0x50, 4, 2, PIPE_BR, USPTP},
+	{SP_LB_8_DATA,                 0x800, 4, 2, PIPE_BR, USPTP},
+	{SP_LB_9_DATA,                 0x800, 4, 2, PIPE_BR, USPTP},
+	{SP_LB_10_DATA,                0x800, 4, 2, PIPE_BR, USPTP},
+	{SP_LB_11_DATA,                0x800, 4, 2, PIPE_BR, USPTP},
+	{SP_LB_12_DATA,                0x200, 4, 2, PIPE_BR, USPTP},
+	{HLSQ_CVS_BE_CTXT_BUF_RAM_TAG,  0x10, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_CVS_BE_CTXT_BUF_RAM_TAG,  0x10, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_CPS_BE_CTXT_BUF_RAM_TAG,  0x10, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_GFX_CVS_BE_CTXT_BUF_RAM, 0x300, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_GFX_CVS_BE_CTXT_BUF_RAM, 0x300, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_GFX_CPS_BE_CTXT_BUF_RAM, 0x300, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_CHUNK_CVS_RAM,           0x1c0, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_CHUNK_CVS_RAM,           0x1c0, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_CHUNK_CPS_RAM,           0x300, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_CHUNK_CPS_RAM,           0x300, 1, 1, PIPE_LPAC, HLSQ_STATE},
+	{HLSQ_CHUNK_CVS_RAM_TAG,        0x40, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_CHUNK_CVS_RAM_TAG,        0x40, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_CHUNK_CPS_RAM_TAG,        0x40, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_CHUNK_CPS_RAM_TAG,        0x40, 1, 1, PIPE_LPAC, HLSQ_STATE},
+	{HLSQ_ICB_CVS_CB_BASE_TAG,      0x10, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_ICB_CVS_CB_BASE_TAG,      0x10, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_ICB_CPS_CB_BASE_TAG,      0x10, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_ICB_CPS_CB_BASE_TAG,      0x10, 1, 1, PIPE_LPAC, HLSQ_STATE},
+	{HLSQ_CVS_MISC_RAM,            0x280, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_CVS_MISC_RAM,            0x280, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_CPS_MISC_RAM,            0x800, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_CPS_MISC_RAM,            0x800, 1, 1, PIPE_LPAC, HLSQ_STATE},
+	{HLSQ_CPS_MISC_RAM_1,          0x200, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_INST_RAM,                0x800, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_INST_RAM,                0x800, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_INST_RAM,                0x800, 1, 1, PIPE_LPAC, HLSQ_STATE},
+	{HLSQ_GFX_CVS_CONST_RAM,       0x800, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_GFX_CVS_CONST_RAM,       0x800, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_GFX_CPS_CONST_RAM,       0x800, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_GFX_CPS_CONST_RAM,       0x800, 1, 1, PIPE_LPAC, HLSQ_STATE},
+	{HLSQ_CVS_MISC_RAM_TAG,         0x10, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_CVS_MISC_RAM_TAG,         0x10, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_CPS_MISC_RAM_TAG,         0x10, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_CPS_MISC_RAM_TAG,         0x10, 1, 1, PIPE_LPAC, HLSQ_STATE},
+	{HLSQ_INST_RAM_TAG,             0x80, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_INST_RAM_TAG,             0x80, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_INST_RAM_TAG,             0x80, 1, 1, PIPE_LPAC, HLSQ_STATE},
+	{HLSQ_GFX_CVS_CONST_RAM_TAG,    0x64, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_GFX_CVS_CONST_RAM_TAG,    0x64, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_GFX_CPS_CONST_RAM_TAG,    0x64, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_GFX_CPS_CONST_RAM_TAG,    0x64, 1, 1, PIPE_LPAC, HLSQ_STATE},
+	{HLSQ_INST_RAM_1,              0x800, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_STPROC_META,              0x10, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_BV_BE_META,               0x10, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_BV_BE_META,               0x10, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_DATAPATH_META,            0x20, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_FRONTEND_META,            0x40, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_FRONTEND_META,            0x40, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_FRONTEND_META,            0x40, 1, 1, PIPE_LPAC, HLSQ_STATE},
+	{HLSQ_INDIRECT_META,            0x10, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_BACKEND_META,             0x40, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_BACKEND_META,             0x40, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_BACKEND_META,             0x40, 1, 1, PIPE_LPAC, HLSQ_STATE},
+};
+
+static const u32 gen7_0_0_pre_crashdumper_gpu_registers[] = {
+	0x00210, 0x00210, 0x00212, 0x00213, 0x03c00, 0x03c0b, 0x03c40, 0x03c42,
+	0x03c45, 0x03c47, 0x03c49, 0x03c4a, 0x03cc0, 0x03cd1,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_pre_crashdumper_gpu_registers), 8));
+
+static const u32 gen7_0_0_post_crashdumper_registers[] = {
+	0x00535, 0x00535, 0x0f400, 0x0f400, 0x0f800, 0x0f803, 0x0fc00, 0x0fc01,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_post_crashdumper_registers), 8));
+
+static const u32 gen7_0_0_gpu_registers[] = {
+	0x00000, 0x00000, 0x00002, 0x00002, 0x00011, 0x00012, 0x00016, 0x0001b,
+	0x0001f, 0x00032, 0x00038, 0x0003c, 0x00042, 0x00042, 0x00044, 0x00044,
+	0x00047, 0x00047, 0x00049, 0x0004a, 0x0004c, 0x0004c, 0x00050, 0x00050,
+	0x00056, 0x00056, 0x00073, 0x00075, 0x000ad, 0x000ae, 0x000b0, 0x000b0,
+	0x000b4, 0x000b4, 0x000b8, 0x000b8, 0x000bc, 0x000bc, 0x000c0, 0x000c0,
+	0x000c4, 0x000c4, 0x000c8, 0x000c8, 0x000cc, 0x000cc, 0x000d0, 0x000d0,
+	0x000d4, 0x000d4, 0x000d8, 0x000d8, 0x000dc, 0x000dc, 0x000e0, 0x000e0,
+	0x000e4, 0x000e4, 0x000e8, 0x000e8, 0x000ec, 0x000ec, 0x000f0, 0x000f0,
+	0x000f4, 0x000f4, 0x000f8, 0x000f8, 0x00100, 0x00100, 0x00104, 0x0010b,
+	0x0010f, 0x0011d, 0x0012f, 0x0012f, 0x00200, 0x0020d, 0x00211, 0x00211,
+	0x00215, 0x00243, 0x00260, 0x00268, 0x00272, 0x00274, 0x00281, 0x0028d,
+	0x00300, 0x00401, 0x00410, 0x00451, 0x00460, 0x004a3, 0x004c0, 0x004d1,
+	0x00500, 0x00500, 0x00507, 0x0050b, 0x0050f, 0x0050f, 0x00511, 0x00511,
+	0x00533, 0x00534, 0x00536, 0x00536, 0x00540, 0x00555, 0x00564, 0x00567,
+	0x00574, 0x00577, 0x005fb, 0x005ff, 0x00800, 0x00808, 0x00810, 0x00813,
+	0x00820, 0x00821, 0x00823, 0x00827, 0x00830, 0x00834, 0x0083f, 0x00841,
+	0x00843, 0x00847, 0x0084f, 0x00886, 0x008a0, 0x008ab, 0x008c0, 0x008c0,
+	0x008c4, 0x008c5, 0x008d0, 0x008dd, 0x008e0, 0x008e6, 0x008f0, 0x008f3,
+	0x00900, 0x00903, 0x00908, 0x00911, 0x00928, 0x0093e, 0x00942, 0x0094d,
+	0x00980, 0x00984, 0x0098d, 0x0098f, 0x009b0, 0x009b4, 0x009c2, 0x009c9,
+	0x009ce, 0x009d7, 0x009e0, 0x009e7, 0x00a00, 0x00a00, 0x00a02, 0x00a03,
+	0x00a10, 0x00a4f, 0x00a61, 0x00a9f, 0x00ad0, 0x00adb, 0x00b00, 0x00b31,
+	0x00b35, 0x00b3c, 0x00b40, 0x00b40, 0x00c00, 0x00c00, 0x00c02, 0x00c04,
+	0x00c06, 0x00c06, 0x00c10, 0x00cd9, 0x00ce0, 0x00d0c, 0x00df0, 0x00df4,
+	0x00e01, 0x00e02, 0x00e07, 0x00e0e, 0x00e10, 0x00e13, 0x00e17, 0x00e19,
+	0x00e1b, 0x00e2b, 0x00e30, 0x00e32, 0x00e38, 0x00e3c,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_gpu_registers), 8));
+
+static const u32 gen7_0_0_gmu_registers[] = {
+	0x10001, 0x10001, 0x10003, 0x10003, 0x10401, 0x10401, 0x10403, 0x10403,
+	0x10801, 0x10801, 0x10803, 0x10803, 0x10c01, 0x10c01, 0x10c03, 0x10c03,
+	0x11001, 0x11001, 0x11003, 0x11003, 0x11401, 0x11401, 0x11403, 0x11403,
+	0x11801, 0x11801, 0x11803, 0x11803, 0x11c01, 0x11c01, 0x11c03, 0x11c03,
+	0x1f400, 0x1f40d, 0x1f40f, 0x1f411, 0x1f500, 0x1f500, 0x1f507, 0x1f507,
+	0x1f509, 0x1f50b, 0x1f800, 0x1f804, 0x1f807, 0x1f808, 0x1f80b, 0x1f80c,
+	0x1f80f, 0x1f80f, 0x1f811, 0x1f811, 0x1f813, 0x1f817, 0x1f819, 0x1f81c,
+	0x1f824, 0x1f82a, 0x1f82d, 0x1f830, 0x1f840, 0x1f853, 0x1f860, 0x1f860,
+	0x1f870, 0x1f879, 0x1f87f, 0x1f87f, 0x1f888, 0x1f889, 0x1f8a0, 0x1f8a2,
+	0x1f8a4, 0x1f8af, 0x1f8c0, 0x1f8c1, 0x1f8c3, 0x1f8c4, 0x1f8d0, 0x1f8d0,
+	0x1f8ec, 0x1f8ec, 0x1f8f0, 0x1f8f1, 0x1f910, 0x1f914, 0x1f920, 0x1f921,
+	0x1f924, 0x1f925, 0x1f928, 0x1f929, 0x1f92c, 0x1f92d, 0x1f940, 0x1f940,
+	0x1f942, 0x1f944, 0x1f948, 0x1f94a, 0x1f94f, 0x1f951, 0x1f958, 0x1f95a,
+	0x1f95d, 0x1f95d, 0x1f962, 0x1f962, 0x1f964, 0x1f96b, 0x1f970, 0x1f979,
+	0x1f980, 0x1f981, 0x1f984, 0x1f986, 0x1f992, 0x1f993, 0x1f996, 0x1f99e,
+	0x1f9c0, 0x1f9c0, 0x1f9c5, 0x1f9d4, 0x1f9f0, 0x1f9f1, 0x1f9f8, 0x1f9fa,
+	0x1fa00, 0x1fa03, 0x20000, 0x20005, 0x20008, 0x2000c, 0x20010, 0x20012,
+	0x20018, 0x20018, 0x20020, 0x20023, 0x20030, 0x20031, 0x23801, 0x23801,
+	0x23803, 0x23803, 0x23805, 0x23805, 0x23807, 0x23807, 0x23809, 0x23809,
+	0x2380b, 0x2380b, 0x2380d, 0x2380d, 0x2380f, 0x2380f, 0x23811, 0x23811,
+	0x23813, 0x23813, 0x23815, 0x23815, 0x23817, 0x23817, 0x23819, 0x23819,
+	0x2381b, 0x2381b, 0x2381d, 0x2381d, 0x2381f, 0x23820, 0x23822, 0x23822,
+	0x23824, 0x23824, 0x23826, 0x23826, 0x23828, 0x23828, 0x2382a, 0x2382a,
+	0x2382c, 0x2382c, 0x2382e, 0x2382e, 0x23830, 0x23830, 0x23832, 0x23832,
+	0x23834, 0x23834, 0x23836, 0x23836, 0x23838, 0x23838, 0x2383a, 0x2383a,
+	0x2383c, 0x2383c, 0x2383e, 0x2383e, 0x23840, 0x23847, 0x23b00, 0x23b01,
+	0x23b03, 0x23b03, 0x23b05, 0x23b0e, 0x23b10, 0x23b13, 0x23b15, 0x23b16,
+	0x23b20, 0x23b20, 0x23b28, 0x23b28, 0x23b30, 0x23b30,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_gmu_registers), 8));
+
+static const u32 gen7_0_0_gmugx_registers[] = {
+	0x1a400, 0x1a41f, 0x1a440, 0x1a45f, 0x1a480, 0x1a49f, 0x1a4c0, 0x1a4df,
+	0x1a500, 0x1a51f, 0x1a540, 0x1a55f, 0x1a580, 0x1a59f, 0x1a5c0, 0x1a5df,
+	0x1a780, 0x1a781, 0x1a783, 0x1a785, 0x1a787, 0x1a789, 0x1a78b, 0x1a78d,
+	0x1a78f, 0x1a791, 0x1a793, 0x1a795, 0x1a797, 0x1a799, 0x1a79b, 0x1a79b,
+	0x1a7c0, 0x1a7c1, 0x1a7c4, 0x1a7c5, 0x1a7c8, 0x1a7c9, 0x1a7cc, 0x1a7cd,
+	0x1a7d0, 0x1a7d1, 0x1a7d4, 0x1a7d5, 0x1a7d8, 0x1a7d9, 0x1a7fc, 0x1a7fd,
+	0x1a800, 0x1a802, 0x1a804, 0x1a804, 0x1a816, 0x1a816, 0x1a81e, 0x1a81e,
+	0x1a826, 0x1a826, 0x1a82e, 0x1a82e, 0x1a836, 0x1a836, 0x1a83e, 0x1a83e,
+	0x1a846, 0x1a846, 0x1a860, 0x1a862, 0x1a864, 0x1a867, 0x1a870, 0x1a870,
+	0x1a883, 0x1a884, 0x1a8c0, 0x1a8c2, 0x1a8c4, 0x1a8c7, 0x1a8d0, 0x1a8d3,
+	0x1a900, 0x1a92b, 0x1a940, 0x1a940,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_gmugx_registers), 8));
+
+static const u32 gen7_0_0_noncontext_pipe_br_registers[] = {
+	0x00887, 0x0088c, 0x08600, 0x08600, 0x08602, 0x08602, 0x08610, 0x0861b,
+	0x08620, 0x08620, 0x08630, 0x08630, 0x08637, 0x08639, 0x08640, 0x08640,
+	0x09600, 0x09600, 0x09602, 0x09603, 0x0960a, 0x09616, 0x09624, 0x0963a,
+	0x09640, 0x09640, 0x09e00, 0x09e00, 0x09e02, 0x09e07, 0x09e0a, 0x09e16,
+	0x09e19, 0x09e19, 0x09e1c, 0x09e1c, 0x09e20, 0x09e25, 0x09e30, 0x09e31,
+	0x09e40, 0x09e51, 0x09e64, 0x09e64, 0x09e70, 0x09e72, 0x09e78, 0x09e79,
+	0x09e80, 0x09fff, 0x0a600, 0x0a600, 0x0a603, 0x0a603, 0x0a610, 0x0a61f,
+	0x0a630, 0x0a631, 0x0a638, 0x0a638,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_noncontext_pipe_br_registers), 8));
+
+static const u32 gen7_0_0_noncontext_pipe_bv_registers[] = {
+	0x00887, 0x0088c, 0x08600, 0x08600, 0x08602, 0x08602, 0x08610, 0x0861b,
+	0x08620, 0x08620, 0x08630, 0x08630, 0x08637, 0x08639, 0x08640, 0x08640,
+	0x09600, 0x09600, 0x09602, 0x09603, 0x0960a, 0x09616, 0x09624, 0x0963a,
+	0x09640, 0x09640, 0x09e00, 0x09e00, 0x09e02, 0x09e07, 0x09e0a, 0x09e16,
+	0x09e19, 0x09e19, 0x09e1c, 0x09e1c, 0x09e20, 0x09e25, 0x09e30, 0x09e31,
+	0x09e40, 0x09e51, 0x09e64, 0x09e64, 0x09e70, 0x09e72, 0x09e78, 0x09e79,
+	0x09e80, 0x09fff, 0x0a600, 0x0a600, 0x0a603, 0x0a603, 0x0a610, 0x0a61f,
+	0x0a630, 0x0a631, 0x0a638, 0x0a638,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_noncontext_pipe_bv_registers), 8));
+
+static const u32 gen7_0_0_noncontext_pipe_lpac_registers[] = {
+	0x00887, 0x0088c, 0x00f80, 0x00f80,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_noncontext_pipe_lpac_registers), 8));
+
+static const u32 gen7_0_0_noncontext_rb_rac_pipe_br_registers[] = {
+	0x08e10, 0x08e1c, 0x08e20, 0x08e25, 0x08e51, 0x08e5a,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_noncontext_rb_rac_pipe_br_registers), 8));
+
+static const u32 gen7_0_0_noncontext_rb_rbp_pipe_br_registers[] = {
+	0x08e01, 0x08e01, 0x08e04, 0x08e04, 0x08e06, 0x08e09, 0x08e0c, 0x08e0c,
+	0x08e28, 0x08e28, 0x08e2c, 0x08e35, 0x08e3b, 0x08e3f, 0x08e50, 0x08e50,
+	0x08e5b, 0x08e5d, 0x08e5f, 0x08e5f, 0x08e61, 0x08e61, 0x08e63, 0x08e65,
+	0x08e68, 0x08e68, 0x08e70, 0x08e79, 0x08e80, 0x08e8f,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_noncontext_rb_rbp_pipe_br_registers), 8));
+
+/* Block: GRAS Cluster: CLUSTER_GRAS Pipeline: PIPE_BR */
+static const u32 gen7_0_0_gras_cluster_gras_pipe_br_registers[] = {
+	0x08000, 0x08008, 0x08010, 0x08092, 0x08094, 0x08099, 0x0809b, 0x0809d,
+	0x080a0, 0x080a7, 0x080af, 0x080f1, 0x080f4, 0x080f6, 0x080f8, 0x080fa,
+	0x08100, 0x08107, 0x08109, 0x0810b, 0x08110, 0x08110, 0x08120, 0x0813f,
+	0x08400, 0x08406, 0x0840a, 0x0840b,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_gras_cluster_gras_pipe_br_registers), 8));
+
+/* Block: GRAS Cluster: CLUSTER_GRAS Pipeline: PIPE_BV */
+static const u32 gen7_0_0_gras_cluster_gras_pipe_bv_registers[] = {
+	0x08000, 0x08008, 0x08010, 0x08092, 0x08094, 0x08099, 0x0809b, 0x0809d,
+	0x080a0, 0x080a7, 0x080af, 0x080f1, 0x080f4, 0x080f6, 0x080f8, 0x080fa,
+	0x08100, 0x08107, 0x08109, 0x0810b, 0x08110, 0x08110, 0x08120, 0x0813f,
+	0x08400, 0x08406, 0x0840a, 0x0840b,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_gras_cluster_gras_pipe_bv_registers), 8));
+
+/* Block: PC Cluster: CLUSTER_FE Pipeline: PIPE_BR */
+static const u32 gen7_0_0_pc_cluster_fe_pipe_br_registers[] = {
+	0x09800, 0x09804, 0x09806, 0x0980a, 0x09810, 0x09811, 0x09884, 0x09886,
+	0x09b00, 0x09b08,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_pc_cluster_fe_pipe_br_registers), 8));
+
+/* Block: PC Cluster: CLUSTER_FE Pipeline: PIPE_BV */
+static const u32 gen7_0_0_pc_cluster_fe_pipe_bv_registers[] = {
+	0x09800, 0x09804, 0x09806, 0x0980a, 0x09810, 0x09811, 0x09884, 0x09886,
+	0x09b00, 0x09b08,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_pc_cluster_fe_pipe_bv_registers), 8));
+
+/* Block: RB_RAC Cluster: CLUSTER_PS Pipeline: PIPE_BR */
+static const u32 gen7_0_0_rb_rac_cluster_ps_pipe_br_registers[] = {
+	0x08802, 0x08802, 0x08804, 0x08806, 0x08809, 0x0880a, 0x0880e, 0x08811,
+	0x08818, 0x0881e, 0x08821, 0x08821, 0x08823, 0x08826, 0x08829, 0x08829,
+	0x0882b, 0x0882e, 0x08831, 0x08831, 0x08833, 0x08836, 0x08839, 0x08839,
+	0x0883b, 0x0883e, 0x08841, 0x08841, 0x08843, 0x08846, 0x08849, 0x08849,
+	0x0884b, 0x0884e, 0x08851, 0x08851, 0x08853, 0x08856, 0x08859, 0x08859,
+	0x0885b, 0x0885e, 0x08860, 0x08864, 0x08870, 0x08870, 0x08873, 0x08876,
+	0x08878, 0x08879, 0x08882, 0x08885, 0x08887, 0x08889, 0x08891, 0x08891,
+	0x08898, 0x08898, 0x088c0, 0x088c1, 0x088e5, 0x088e5, 0x088f4, 0x088f5,
+	0x08a00, 0x08a05, 0x08a10, 0x08a15, 0x08a20, 0x08a25, 0x08a30, 0x08a35,
+	0x08c00, 0x08c01, 0x08c18, 0x08c1f, 0x08c26, 0x08c34,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_rb_rac_cluster_ps_pipe_br_registers), 8));
+
+/* Block: RB_RBP Cluster: CLUSTER_PS Pipeline: PIPE_BR */
+static const u32 gen7_0_0_rb_rbp_cluster_ps_pipe_br_registers[] = {
+	0x08800, 0x08801, 0x08803, 0x08803, 0x0880b, 0x0880d, 0x08812, 0x08812,
+	0x08820, 0x08820, 0x08822, 0x08822, 0x08827, 0x08828, 0x0882a, 0x0882a,
+	0x0882f, 0x08830, 0x08832, 0x08832, 0x08837, 0x08838, 0x0883a, 0x0883a,
+	0x0883f, 0x08840, 0x08842, 0x08842, 0x08847, 0x08848, 0x0884a, 0x0884a,
+	0x0884f, 0x08850, 0x08852, 0x08852, 0x08857, 0x08858, 0x0885a, 0x0885a,
+	0x0885f, 0x0885f, 0x08865, 0x08865, 0x08871, 0x08872, 0x08877, 0x08877,
+	0x08880, 0x08881, 0x08886, 0x08886, 0x08890, 0x08890, 0x088d0, 0x088e4,
+	0x088e8, 0x088ea, 0x088f0, 0x088f0, 0x08900, 0x0891a, 0x08927, 0x08928,
+	0x08c17, 0x08c17, 0x08c20, 0x08c25,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_rb_rbp_cluster_ps_pipe_br_registers), 8));
+
+/* Block: SP Cluster: CLUSTER_SP_PS Pipeline: PIPE_BR Location: HLSQ_STATE */
+static const u32 gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_state_registers[] = {
+	0x0a980, 0x0a980, 0x0a982, 0x0a984, 0x0a99e, 0x0a99e, 0x0a9a7, 0x0a9a7,
+	0x0a9aa, 0x0a9aa, 0x0a9ae, 0x0a9b0, 0x0a9b3, 0x0a9b5, 0x0a9ba, 0x0a9ba,
+	0x0a9bc, 0x0a9bc, 0x0a9c4, 0x0a9c4, 0x0a9cd, 0x0a9cd, 0x0a9e0, 0x0a9fc,
+	0x0aa00, 0x0aa00, 0x0aa30, 0x0aa31, 0x0aa40, 0x0aabf, 0x0ab00, 0x0ab03,
+	0x0ab05, 0x0ab05, 0x0ab0a, 0x0ab1b, 0x0ab20, 0x0ab20, 0x0ab40, 0x0abbf,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_state_registers), 8));
+
+/* Block: SP Cluster: CLUSTER_SP_PS Pipeline: PIPE_LPAC Location: HLSQ_STATE */
+static const u32 gen7_0_0_sp_cluster_sp_ps_pipe_lpac_hlsq_state_registers[] = {
+	0x0a9b0, 0x0a9b0, 0x0a9b3, 0x0a9b5, 0x0a9ba, 0x0a9ba, 0x0a9bc, 0x0a9bc,
+	0x0a9c4, 0x0a9c4, 0x0a9cd, 0x0a9cd, 0x0a9e2, 0x0a9e3, 0x0a9e6, 0x0a9fc,
+	0x0aa00, 0x0aa00, 0x0aa31, 0x0aa31, 0x0ab00, 0x0ab01,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_ps_pipe_lpac_hlsq_state_registers), 8));
+
+/* Block: SP Cluster: CLUSTER_SP_PS Pipeline: PIPE_BR Location: HLSQ_DP */
+static const u32 gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_dp_registers[] = {
+	0x0a9b1, 0x0a9b1, 0x0a9c6, 0x0a9cb, 0x0a9d4, 0x0a9df,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_dp_registers), 8));
+
+/* Block: SP Cluster: CLUSTER_SP_PS Pipeline: PIPE_LPAC Location: HLSQ_DP */
+static const u32 gen7_0_0_sp_cluster_sp_ps_pipe_lpac_hlsq_dp_registers[] = {
+	0x0a9b1, 0x0a9b1, 0x0a9d4, 0x0a9df,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_ps_pipe_lpac_hlsq_dp_registers), 8));
+
+/* Block: SP Cluster: CLUSTER_SP_PS Pipeline: PIPE_BR Location: SP_TOP */
+static const u32 gen7_0_0_sp_cluster_sp_ps_pipe_br_sp_top_registers[] = {
+	0x0a980, 0x0a980, 0x0a982, 0x0a984, 0x0a99e, 0x0a9a2, 0x0a9a7, 0x0a9a8,
+	0x0a9aa, 0x0a9aa, 0x0a9ae, 0x0a9ae, 0x0a9b0, 0x0a9b1, 0x0a9b3, 0x0a9b5,
+	0x0a9ba, 0x0a9bc, 0x0a9e0, 0x0a9f9, 0x0aa00, 0x0aa00, 0x0ab00, 0x0ab00,
+	0x0ab02, 0x0ab02, 0x0ab04, 0x0ab05, 0x0ab0a, 0x0ab1b, 0x0ab20, 0x0ab20,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_ps_pipe_br_sp_top_registers), 8));
+
+/* Block: SP Cluster: CLUSTER_SP_PS Pipeline: PIPE_LPAC Location: SP_TOP */
+static const u32 gen7_0_0_sp_cluster_sp_ps_pipe_lpac_sp_top_registers[] = {
+	0x0a9b0, 0x0a9b1, 0x0a9b3, 0x0a9b5, 0x0a9ba, 0x0a9bc, 0x0a9e2, 0x0a9e3,
+	0x0a9e6, 0x0a9f9, 0x0aa00, 0x0aa00, 0x0ab00, 0x0ab00,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_ps_pipe_lpac_sp_top_registers), 8));
+
+/* Block: SP Cluster: CLUSTER_SP_PS Pipeline: PIPE_BR Location: uSPTP */
+static const u32 gen7_0_0_sp_cluster_sp_ps_pipe_br_usptp_registers[] = {
+	0x0a980, 0x0a982, 0x0a985, 0x0a9a6, 0x0a9a8, 0x0a9a9, 0x0a9ab, 0x0a9ae,
+	0x0a9b0, 0x0a9b3, 0x0a9b6, 0x0a9b9, 0x0a9bb, 0x0a9bf, 0x0a9c2, 0x0a9c3,
+	0x0a9cd, 0x0a9cd, 0x0a9d0, 0x0a9d3, 0x0aa30, 0x0aa31, 0x0aa40, 0x0aabf,
+	0x0ab00, 0x0ab05, 0x0ab21, 0x0ab22, 0x0ab40, 0x0abbf,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_ps_pipe_br_usptp_registers), 8));
+
+/* Block: SP Cluster: CLUSTER_SP_PS Pipeline: PIPE_LPAC Location: uSPTP */
+static const u32 gen7_0_0_sp_cluster_sp_ps_pipe_lpac_usptp_registers[] = {
+	0x0a9b0, 0x0a9b3, 0x0a9b6, 0x0a9b9, 0x0a9bb, 0x0a9be, 0x0a9c2, 0x0a9c3,
+	0x0a9cd, 0x0a9cd, 0x0a9d0, 0x0a9d3, 0x0aa31, 0x0aa31, 0x0ab00, 0x0ab01,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_ps_pipe_lpac_usptp_registers), 8));
+
+/* Block: SP Cluster: CLUSTER_SP_VS Pipeline: PIPE_BR Location: HLSQ_STATE */
+static const u32 gen7_0_0_sp_cluster_sp_vs_pipe_br_hlsq_state_registers[] = {
+	0x0a800, 0x0a800, 0x0a81b, 0x0a81d, 0x0a822, 0x0a822, 0x0a824, 0x0a824,
+	0x0a827, 0x0a82a, 0x0a830, 0x0a830, 0x0a833, 0x0a835, 0x0a83a, 0x0a83a,
+	0x0a83c, 0x0a83c, 0x0a83f, 0x0a840, 0x0a85b, 0x0a85d, 0x0a862, 0x0a862,
+	0x0a864, 0x0a864, 0x0a867, 0x0a867, 0x0a870, 0x0a870, 0x0a88c, 0x0a88e,
+	0x0a893, 0x0a893, 0x0a895, 0x0a895, 0x0a898, 0x0a898, 0x0a89a, 0x0a89d,
+	0x0a8a0, 0x0a8af, 0x0a8c0, 0x0a8c3, 0x0ab00, 0x0ab03, 0x0ab05, 0x0ab05,
+	0x0ab0a, 0x0ab1b, 0x0ab20, 0x0ab20, 0x0ab40, 0x0abbf,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_vs_pipe_br_hlsq_state_registers), 8));
+
+/* Block: SP Cluster: CLUSTER_SP_VS Pipeline: PIPE_BV Location: HLSQ_STATE */
+static const u32 gen7_0_0_sp_cluster_sp_vs_pipe_bv_hlsq_state_registers[] = {
+	0x0a800, 0x0a800, 0x0a81b, 0x0a81d, 0x0a822, 0x0a822, 0x0a824, 0x0a824,
+	0x0a827, 0x0a82a, 0x0a830, 0x0a830, 0x0a833, 0x0a835, 0x0a83a, 0x0a83a,
+	0x0a83c, 0x0a83c, 0x0a83f, 0x0a840, 0x0a85b, 0x0a85d, 0x0a862, 0x0a862,
+	0x0a864, 0x0a864, 0x0a867, 0x0a867, 0x0a870, 0x0a870, 0x0a88c, 0x0a88e,
+	0x0a893, 0x0a893, 0x0a895, 0x0a895, 0x0a898, 0x0a898, 0x0a89a, 0x0a89d,
+	0x0a8a0, 0x0a8af, 0x0a8c0, 0x0a8c3, 0x0ab00, 0x0ab02, 0x0ab0a, 0x0ab1b,
+	0x0ab20, 0x0ab20, 0x0ab40, 0x0abbf,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_vs_pipe_bv_hlsq_state_registers), 8));
+
+/* Block: SP Cluster: CLUSTER_SP_VS Pipeline: PIPE_BR Location: SP_TOP */
+static const u32 gen7_0_0_sp_cluster_sp_vs_pipe_br_sp_top_registers[] = {
+	0x0a800, 0x0a800, 0x0a81c, 0x0a81d, 0x0a822, 0x0a824, 0x0a830, 0x0a831,
+	0x0a834, 0x0a835, 0x0a83a, 0x0a83c, 0x0a840, 0x0a840, 0x0a85c, 0x0a85d,
+	0x0a862, 0x0a864, 0x0a870, 0x0a871, 0x0a88d, 0x0a88e, 0x0a893, 0x0a895,
+	0x0a8a0, 0x0a8af, 0x0ab00, 0x0ab00, 0x0ab02, 0x0ab02, 0x0ab04, 0x0ab05,
+	0x0ab0a, 0x0ab1b, 0x0ab20, 0x0ab20,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_vs_pipe_br_sp_top_registers), 8));
+
+/* Block: SP Cluster: CLUSTER_SP_VS Pipeline: PIPE_BV Location: SP_TOP */
+static const u32 gen7_0_0_sp_cluster_sp_vs_pipe_bv_sp_top_registers[] = {
+	0x0a800, 0x0a800, 0x0a81c, 0x0a81d, 0x0a822, 0x0a824, 0x0a830, 0x0a831,
+	0x0a834, 0x0a835, 0x0a83a, 0x0a83c, 0x0a840, 0x0a840, 0x0a85c, 0x0a85d,
+	0x0a862, 0x0a864, 0x0a870, 0x0a871, 0x0a88d, 0x0a88e, 0x0a893, 0x0a895,
+	0x0a8a0, 0x0a8af, 0x0ab00, 0x0ab00, 0x0ab02, 0x0ab02, 0x0ab0a, 0x0ab1b,
+	0x0ab20, 0x0ab20,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_vs_pipe_bv_sp_top_registers), 8));
+
+/* Block: SP Cluster: CLUSTER_SP_VS Pipeline: PIPE_BR Location: uSPTP */
+static const u32 gen7_0_0_sp_cluster_sp_vs_pipe_br_usptp_registers[] = {
+	0x0a800, 0x0a81b, 0x0a81e, 0x0a821, 0x0a823, 0x0a827, 0x0a830, 0x0a833,
+	0x0a836, 0x0a839, 0x0a83b, 0x0a85b, 0x0a85e, 0x0a861, 0x0a863, 0x0a867,
+	0x0a870, 0x0a88c, 0x0a88f, 0x0a892, 0x0a894, 0x0a898, 0x0a8c0, 0x0a8c3,
+	0x0ab00, 0x0ab05, 0x0ab21, 0x0ab22, 0x0ab40, 0x0abbf,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_vs_pipe_br_usptp_registers), 8));
+
+/* Block: SP Cluster: CLUSTER_SP_VS Pipeline: PIPE_BV Location: uSPTP */
+static const u32 gen7_0_0_sp_cluster_sp_vs_pipe_bv_usptp_registers[] = {
+	0x0a800, 0x0a81b, 0x0a81e, 0x0a821, 0x0a823, 0x0a827, 0x0a830, 0x0a833,
+	0x0a836, 0x0a839, 0x0a83b, 0x0a85b, 0x0a85e, 0x0a861, 0x0a863, 0x0a867,
+	0x0a870, 0x0a88c, 0x0a88f, 0x0a892, 0x0a894, 0x0a898, 0x0a8c0, 0x0a8c3,
+	0x0ab00, 0x0ab02, 0x0ab21, 0x0ab22, 0x0ab40, 0x0abbf,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_vs_pipe_bv_usptp_registers), 8));
+
+/* Block: TPL1 Cluster: CLUSTER_SP_PS Pipeline: PIPE_BR */
+static const u32 gen7_0_0_tpl1_cluster_sp_ps_pipe_br_registers[] = {
+	0x0b180, 0x0b183, 0x0b190, 0x0b195, 0x0b2c0, 0x0b2d5, 0x0b300, 0x0b307,
+	0x0b309, 0x0b309, 0x0b310, 0x0b310,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_tpl1_cluster_sp_ps_pipe_br_registers), 8));
+
+/* Block: SP Cluster: CLUSTER_SP_PS Pipeline: PIPE_BV Location: HLSQ_STATE */
+static const u32 gen7_0_0_sp_cluster_sp_ps_pipe_bv_hlsq_state_registers[] = {
+	0x0ab00, 0x0ab02, 0x0ab0a, 0x0ab1b, 0x0ab20, 0x0ab20, 0x0ab40, 0x0abbf,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_ps_pipe_bv_hlsq_state_registers), 8));
+
+/* Block: SP Cluster: CLUSTER_SP_PS Pipeline: PIPE_BV Location: SP_TOP */
+static const u32 gen7_0_0_sp_cluster_sp_ps_pipe_bv_sp_top_registers[] = {
+	0x0ab00, 0x0ab00, 0x0ab02, 0x0ab02, 0x0ab0a, 0x0ab1b, 0x0ab20, 0x0ab20,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_ps_pipe_bv_sp_top_registers), 8));
+
+/* Block: SP Cluster: CLUSTER_SP_PS Pipeline: PIPE_BV Location: uSPTP */
+static const u32 gen7_0_0_sp_cluster_sp_ps_pipe_bv_usptp_registers[] = {
+	0x0ab00, 0x0ab02, 0x0ab21, 0x0ab22, 0x0ab40, 0x0abbf,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_cluster_sp_ps_pipe_bv_usptp_registers), 8));
+
+/* Block: TPL1 Cluster: CLUSTER_SP_PS Pipeline: PIPE_BV */
+static const u32 gen7_0_0_tpl1_cluster_sp_ps_pipe_bv_registers[] = {
+	0x0b300, 0x0b307, 0x0b309, 0x0b309, 0x0b310, 0x0b310,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_tpl1_cluster_sp_ps_pipe_bv_registers), 8));
+
+/* Block: TPL1 Cluster: CLUSTER_SP_PS Pipeline: PIPE_LPAC */
+static const u32 gen7_0_0_tpl1_cluster_sp_ps_pipe_lpac_registers[] = {
+	0x0b180, 0x0b181, 0x0b300, 0x0b301, 0x0b307, 0x0b307, 0x0b309, 0x0b309,
+	0x0b310, 0x0b310,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_tpl1_cluster_sp_ps_pipe_lpac_registers), 8));
+
+/* Block: TPL1 Cluster: CLUSTER_SP_VS Pipeline: PIPE_BR */
+static const u32 gen7_0_0_tpl1_cluster_sp_vs_pipe_br_registers[] = {
+	0x0b300, 0x0b307, 0x0b309, 0x0b309, 0x0b310, 0x0b310,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_tpl1_cluster_sp_vs_pipe_br_registers), 8));
+
+/* Block: TPL1 Cluster: CLUSTER_SP_VS Pipeline: PIPE_BV */
+static const u32 gen7_0_0_tpl1_cluster_sp_vs_pipe_bv_registers[] = {
+	0x0b300, 0x0b307, 0x0b309, 0x0b309, 0x0b310, 0x0b310,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_tpl1_cluster_sp_vs_pipe_bv_registers), 8));
+
+/* Block: VFD Cluster: CLUSTER_FE Pipeline: PIPE_BR */
+static const u32 gen7_0_0_vfd_cluster_fe_pipe_br_registers[] = {
+	0x0a000, 0x0a009, 0x0a00e, 0x0a0ef,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_vfd_cluster_fe_pipe_br_registers), 8));
+
+/* Block: VFD Cluster: CLUSTER_FE Pipeline: PIPE_BV */
+static const u32 gen7_0_0_vfd_cluster_fe_pipe_bv_registers[] = {
+	0x0a000, 0x0a009, 0x0a00e, 0x0a0ef,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_vfd_cluster_fe_pipe_bv_registers), 8));
+
+/* Block: VPC Cluster: CLUSTER_FE Pipeline: PIPE_BR */
+static const u32 gen7_0_0_vpc_cluster_fe_pipe_br_registers[] = {
+	0x09300, 0x09307,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_vpc_cluster_fe_pipe_br_registers), 8));
+
+/* Block: VPC Cluster: CLUSTER_FE Pipeline: PIPE_BV */
+static const u32 gen7_0_0_vpc_cluster_fe_pipe_bv_registers[] = {
+	0x09300, 0x09307,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_vpc_cluster_fe_pipe_bv_registers), 8));
+
+/* Block: VPC Cluster: CLUSTER_PC_VS Pipeline: PIPE_BR */
+static const u32 gen7_0_0_vpc_cluster_pc_vs_pipe_br_registers[] = {
+	0x09101, 0x0910c, 0x09300, 0x09307,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_vpc_cluster_pc_vs_pipe_br_registers), 8));
+
+/* Block: VPC Cluster: CLUSTER_PC_VS Pipeline: PIPE_BV */
+static const u32 gen7_0_0_vpc_cluster_pc_vs_pipe_bv_registers[] = {
+	0x09101, 0x0910c, 0x09300, 0x09307,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_vpc_cluster_pc_vs_pipe_bv_registers), 8));
+
+/* Block: VPC Cluster: CLUSTER_VPC_PS Pipeline: PIPE_BR */
+static const u32 gen7_0_0_vpc_cluster_vpc_ps_pipe_br_registers[] = {
+	0x09200, 0x0920f, 0x09212, 0x09216, 0x09218, 0x09236, 0x09300, 0x09307,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_vpc_cluster_vpc_ps_pipe_br_registers), 8));
+
+/* Block: VPC Cluster: CLUSTER_VPC_PS Pipeline: PIPE_BV */
+static const u32 gen7_0_0_vpc_cluster_vpc_ps_pipe_bv_registers[] = {
+	0x09200, 0x0920f, 0x09212, 0x09216, 0x09218, 0x09236, 0x09300, 0x09307,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_vpc_cluster_vpc_ps_pipe_bv_registers), 8));
+
+/* Block: SP Cluster: noncontext Pipeline: PIPE_BR Location: HLSQ_STATE */
+static const u32 gen7_0_0_sp_noncontext_pipe_br_hlsq_state_registers[] = {
+	0x0ae52, 0x0ae52, 0x0ae60, 0x0ae67, 0x0ae69, 0x0ae73,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_noncontext_pipe_br_hlsq_state_registers), 8));
+
+/* Block: SP Cluster: noncontext Pipeline: PIPE_BR Location: SP_TOP */
+static const u32 gen7_0_0_sp_noncontext_pipe_br_sp_top_registers[] = {
+	0x0ae00, 0x0ae00, 0x0ae02, 0x0ae04, 0x0ae06, 0x0ae09, 0x0ae0c, 0x0ae0c,
+	0x0ae0f, 0x0ae0f, 0x0ae28, 0x0ae2b, 0x0ae35, 0x0ae35, 0x0ae3a, 0x0ae3f,
+	0x0ae50, 0x0ae52, 0x0ae80, 0x0aea3,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_noncontext_pipe_br_sp_top_registers), 8));
+
+/* Block: SP Cluster: noncontext Pipeline: PIPE_BR Location: uSPTP */
+static const u32 gen7_0_0_sp_noncontext_pipe_br_usptp_registers[] = {
+	0x0ae00, 0x0ae00, 0x0ae02, 0x0ae04, 0x0ae06, 0x0ae09, 0x0ae0c, 0x0ae0c,
+	0x0ae0f, 0x0ae0f, 0x0ae30, 0x0ae32, 0x0ae35, 0x0ae35, 0x0ae3a, 0x0ae3b,
+	0x0ae3e, 0x0ae3f, 0x0ae50, 0x0ae52,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_noncontext_pipe_br_usptp_registers), 8));
+
+/* Block: SP Cluster: noncontext Pipeline: PIPE_LPAC Location: HLSQ_STATE */
+static const u32 gen7_0_0_sp_noncontext_pipe_lpac_hlsq_state_registers[] = {
+	0x0af88, 0x0af8a,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_noncontext_pipe_lpac_hlsq_state_registers), 8));
+
+/* Block: SP Cluster: noncontext Pipeline: PIPE_LPAC Location: SP_TOP */
+static const u32 gen7_0_0_sp_noncontext_pipe_lpac_sp_top_registers[] = {
+	0x0af80, 0x0af84,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_noncontext_pipe_lpac_sp_top_registers), 8));
+
+/* Block: SP Cluster: noncontext Pipeline: PIPE_LPAC Location: uSPTP */
+static const u32 gen7_0_0_sp_noncontext_pipe_lpac_usptp_registers[] = {
+	0x0af80, 0x0af84, 0x0af90, 0x0af92,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_sp_noncontext_pipe_lpac_usptp_registers), 8));
+
+/* Block: TPl1 Cluster: noncontext Pipeline: PIPE_BR */
+static const u32 gen7_0_0_tpl1_noncontext_pipe_br_registers[] = {
+	0x0b600, 0x0b600, 0x0b602, 0x0b602, 0x0b604, 0x0b604, 0x0b608, 0x0b60c,
+	0x0b60f, 0x0b621, 0x0b630, 0x0b633,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_tpl1_noncontext_pipe_br_registers), 8));
+
+/* Block: TPl1 Cluster: noncontext Pipeline: PIPE_LPAC */
+static const u32 gen7_0_0_tpl1_noncontext_pipe_lpac_registers[] = {
+	0x0b780, 0x0b780,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_tpl1_noncontext_pipe_lpac_registers), 8));
+
+static const struct gen7_sel_reg  gen7_0_0_rb_rac_sel = {
+	.host_reg = GEN7_RB_SUB_BLOCK_SEL_CNTL_HOST,
+	.cd_reg = GEN7_RB_SUB_BLOCK_SEL_CNTL_CD,
+	.val = 0x0,
+};
+
+static const struct gen7_sel_reg gen7_0_0_rb_rbp_sel = {
+	.host_reg = GEN7_RB_SUB_BLOCK_SEL_CNTL_HOST,
+	.cd_reg = GEN7_RB_SUB_BLOCK_SEL_CNTL_CD,
+	.val = 0x9,
+};
+
+static struct gen7_cluster_registers gen7_0_0_clusters[] = {
+	{ CLUSTER_NONE, PIPE_BR, STATE_NON_CONTEXT,
+		gen7_0_0_noncontext_pipe_br_registers, },
+	{ CLUSTER_NONE, PIPE_BV, STATE_NON_CONTEXT,
+		gen7_0_0_noncontext_pipe_bv_registers, },
+	{ CLUSTER_NONE, PIPE_LPAC, STATE_NON_CONTEXT,
+		gen7_0_0_noncontext_pipe_lpac_registers, },
+	{ CLUSTER_NONE, PIPE_BR, STATE_NON_CONTEXT,
+		gen7_0_0_noncontext_rb_rac_pipe_br_registers, &gen7_0_0_rb_rac_sel, },
+	{ CLUSTER_NONE, PIPE_BR, STATE_NON_CONTEXT,
+		gen7_0_0_noncontext_rb_rbp_pipe_br_registers, &gen7_0_0_rb_rbp_sel, },
+	{ CLUSTER_GRAS, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen7_0_0_gras_cluster_gras_pipe_br_registers, },
+	{ CLUSTER_GRAS, PIPE_BV, STATE_FORCE_CTXT_0,
+		gen7_0_0_gras_cluster_gras_pipe_bv_registers, },
+	{ CLUSTER_GRAS, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen7_0_0_gras_cluster_gras_pipe_br_registers, },
+	{ CLUSTER_GRAS, PIPE_BV, STATE_FORCE_CTXT_1,
+		gen7_0_0_gras_cluster_gras_pipe_bv_registers, },
+	{ CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen7_0_0_pc_cluster_fe_pipe_br_registers, },
+	{ CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_0,
+		gen7_0_0_pc_cluster_fe_pipe_bv_registers, },
+	{ CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen7_0_0_pc_cluster_fe_pipe_br_registers, },
+	{ CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_1,
+		gen7_0_0_pc_cluster_fe_pipe_bv_registers, },
+	{ CLUSTER_PS, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen7_0_0_rb_rac_cluster_ps_pipe_br_registers, &gen7_0_0_rb_rac_sel, },
+	{ CLUSTER_PS, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen7_0_0_rb_rac_cluster_ps_pipe_br_registers, &gen7_0_0_rb_rac_sel, },
+	{ CLUSTER_PS, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen7_0_0_rb_rbp_cluster_ps_pipe_br_registers, &gen7_0_0_rb_rbp_sel, },
+	{ CLUSTER_PS, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen7_0_0_rb_rbp_cluster_ps_pipe_br_registers, &gen7_0_0_rb_rbp_sel, },
+	{ CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen7_0_0_vfd_cluster_fe_pipe_br_registers, },
+	{ CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_0,
+		gen7_0_0_vfd_cluster_fe_pipe_bv_registers, },
+	{ CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen7_0_0_vfd_cluster_fe_pipe_br_registers, },
+	{ CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_1,
+		gen7_0_0_vfd_cluster_fe_pipe_bv_registers, },
+	{ CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen7_0_0_vpc_cluster_fe_pipe_br_registers, },
+	{ CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_0,
+		gen7_0_0_vpc_cluster_fe_pipe_bv_registers, },
+	{ CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen7_0_0_vpc_cluster_fe_pipe_br_registers, },
+	{ CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_1,
+		gen7_0_0_vpc_cluster_fe_pipe_bv_registers, },
+	{ CLUSTER_PC_VS, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen7_0_0_vpc_cluster_pc_vs_pipe_br_registers, },
+	{ CLUSTER_PC_VS, PIPE_BV, STATE_FORCE_CTXT_0,
+		gen7_0_0_vpc_cluster_pc_vs_pipe_bv_registers, },
+	{ CLUSTER_PC_VS, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen7_0_0_vpc_cluster_pc_vs_pipe_br_registers, },
+	{ CLUSTER_PC_VS, PIPE_BV, STATE_FORCE_CTXT_1,
+		gen7_0_0_vpc_cluster_pc_vs_pipe_bv_registers, },
+	{ CLUSTER_VPC_PS, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen7_0_0_vpc_cluster_vpc_ps_pipe_br_registers, },
+	{ CLUSTER_VPC_PS, PIPE_BV, STATE_FORCE_CTXT_0,
+		gen7_0_0_vpc_cluster_vpc_ps_pipe_bv_registers, },
+	{ CLUSTER_VPC_PS, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen7_0_0_vpc_cluster_vpc_ps_pipe_br_registers, },
+	{ CLUSTER_VPC_PS, PIPE_BV, STATE_FORCE_CTXT_1,
+		gen7_0_0_vpc_cluster_vpc_ps_pipe_bv_registers, },
+};
+
+static struct gen7_sptp_cluster_registers gen7_0_0_sptp_clusters[] = {
+	{ CLUSTER_NONE, SP_NCTX_REG, PIPE_BR, 0, HLSQ_STATE,
+		gen7_0_0_sp_noncontext_pipe_br_hlsq_state_registers, 0xae00 },
+	{ CLUSTER_NONE, SP_NCTX_REG, PIPE_BR, 0, SP_TOP,
+		gen7_0_0_sp_noncontext_pipe_br_sp_top_registers, 0xae00 },
+	{ CLUSTER_NONE, SP_NCTX_REG, PIPE_BR, 0, USPTP,
+		gen7_0_0_sp_noncontext_pipe_br_usptp_registers, 0xae00 },
+	{ CLUSTER_NONE, SP_NCTX_REG, PIPE_LPAC, 0, HLSQ_STATE,
+		gen7_0_0_sp_noncontext_pipe_lpac_hlsq_state_registers, 0xaf80 },
+	{ CLUSTER_NONE, SP_NCTX_REG, PIPE_LPAC, 0, SP_TOP,
+		gen7_0_0_sp_noncontext_pipe_lpac_sp_top_registers, 0xaf80 },
+	{ CLUSTER_NONE, SP_NCTX_REG, PIPE_LPAC, 0, USPTP,
+		gen7_0_0_sp_noncontext_pipe_lpac_usptp_registers, 0xaf80 },
+	{ CLUSTER_NONE, TP0_NCTX_REG, PIPE_BR, 0, USPTP,
+		gen7_0_0_tpl1_noncontext_pipe_br_registers, 0xb600 },
+	{ CLUSTER_NONE, TP0_NCTX_REG, PIPE_LPAC, 0, USPTP,
+		gen7_0_0_tpl1_noncontext_pipe_lpac_registers, 0xb780 },
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, HLSQ_STATE,
+		gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_state_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, HLSQ_DP,
+		gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_dp_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, SP_TOP,
+		gen7_0_0_sp_cluster_sp_ps_pipe_br_sp_top_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, USPTP,
+		gen7_0_0_sp_cluster_sp_ps_pipe_br_usptp_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, HLSQ_STATE,
+		gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_state_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, HLSQ_DP,
+		gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_dp_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, SP_TOP,
+		gen7_0_0_sp_cluster_sp_ps_pipe_br_sp_top_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, USPTP,
+		gen7_0_0_sp_cluster_sp_ps_pipe_br_usptp_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX2_3D_CPS_REG, PIPE_BR, 2, HLSQ_STATE,
+		gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_state_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX2_3D_CPS_REG, PIPE_BR, 2, HLSQ_DP,
+		gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_dp_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX2_3D_CPS_REG, PIPE_BR, 2, SP_TOP,
+		gen7_0_0_sp_cluster_sp_ps_pipe_br_sp_top_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX2_3D_CPS_REG, PIPE_BR, 2, USPTP,
+		gen7_0_0_sp_cluster_sp_ps_pipe_br_usptp_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX3_3D_CPS_REG, PIPE_BR, 3, HLSQ_STATE,
+		gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_state_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX3_3D_CPS_REG, PIPE_BR, 3, HLSQ_DP,
+		gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_dp_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX3_3D_CPS_REG, PIPE_BR, 3, SP_TOP,
+		gen7_0_0_sp_cluster_sp_ps_pipe_br_sp_top_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX3_3D_CPS_REG, PIPE_BR, 3, USPTP,
+		gen7_0_0_sp_cluster_sp_ps_pipe_br_usptp_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, HLSQ_STATE,
+		gen7_0_0_sp_cluster_sp_ps_pipe_lpac_hlsq_state_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, HLSQ_DP,
+		gen7_0_0_sp_cluster_sp_ps_pipe_lpac_hlsq_dp_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, SP_TOP,
+		gen7_0_0_sp_cluster_sp_ps_pipe_lpac_sp_top_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, USPTP,
+		gen7_0_0_sp_cluster_sp_ps_pipe_lpac_usptp_registers, 0xa800 },
+	{ CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, HLSQ_STATE,
+		gen7_0_0_sp_cluster_sp_vs_pipe_br_hlsq_state_registers, 0xa800 },
+	{ CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, HLSQ_STATE,
+		gen7_0_0_sp_cluster_sp_vs_pipe_bv_hlsq_state_registers, 0xa800 },
+	{ CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, SP_TOP,
+		gen7_0_0_sp_cluster_sp_vs_pipe_br_sp_top_registers, 0xa800 },
+	{ CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, SP_TOP,
+		gen7_0_0_sp_cluster_sp_vs_pipe_bv_sp_top_registers, 0xa800 },
+	{ CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, USPTP,
+		gen7_0_0_sp_cluster_sp_vs_pipe_br_usptp_registers, 0xa800 },
+	{ CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, USPTP,
+		gen7_0_0_sp_cluster_sp_vs_pipe_bv_usptp_registers, 0xa800 },
+	{ CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, HLSQ_STATE,
+		gen7_0_0_sp_cluster_sp_vs_pipe_br_hlsq_state_registers, 0xa800 },
+	{ CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, HLSQ_STATE,
+		gen7_0_0_sp_cluster_sp_vs_pipe_bv_hlsq_state_registers, 0xa800 },
+	{ CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, SP_TOP,
+		gen7_0_0_sp_cluster_sp_vs_pipe_br_sp_top_registers, 0xa800 },
+	{ CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, SP_TOP,
+		gen7_0_0_sp_cluster_sp_vs_pipe_bv_sp_top_registers, 0xa800 },
+	{ CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, USPTP,
+		gen7_0_0_sp_cluster_sp_vs_pipe_br_usptp_registers, 0xa800 },
+	{ CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, USPTP,
+		gen7_0_0_sp_cluster_sp_vs_pipe_bv_usptp_registers, 0xa800 },
+	{ CLUSTER_SP_PS, TP0_CTX0_3D_CPS_REG, PIPE_BR, 0, USPTP,
+		gen7_0_0_tpl1_cluster_sp_ps_pipe_br_registers, 0xb000 },
+	{ CLUSTER_SP_PS, TP0_CTX1_3D_CPS_REG, PIPE_BR, 1, USPTP,
+		gen7_0_0_tpl1_cluster_sp_ps_pipe_br_registers, 0xb000 },
+	{ CLUSTER_SP_PS, TP0_CTX2_3D_CPS_REG, PIPE_BR, 2, USPTP,
+		gen7_0_0_tpl1_cluster_sp_ps_pipe_br_registers, 0xb000 },
+	{ CLUSTER_SP_PS, TP0_CTX3_3D_CPS_REG, PIPE_BR, 3, USPTP,
+		gen7_0_0_tpl1_cluster_sp_ps_pipe_br_registers, 0xb000 },
+	{ CLUSTER_SP_PS, TP0_CTX0_3D_CPS_REG, PIPE_LPAC, 0, USPTP,
+		gen7_0_0_tpl1_cluster_sp_ps_pipe_lpac_registers, 0xb000 },
+	{ CLUSTER_SP_VS, TP0_CTX0_3D_CVS_REG, PIPE_BR, 0, USPTP,
+		gen7_0_0_tpl1_cluster_sp_vs_pipe_br_registers, 0xb000 },
+	{ CLUSTER_SP_VS, TP0_CTX0_3D_CVS_REG, PIPE_BV, 0, USPTP,
+		gen7_0_0_tpl1_cluster_sp_vs_pipe_bv_registers, 0xb000 },
+	{ CLUSTER_SP_VS, TP0_CTX1_3D_CVS_REG, PIPE_BR, 1, USPTP,
+		gen7_0_0_tpl1_cluster_sp_vs_pipe_br_registers, 0xb000 },
+	{ CLUSTER_SP_VS, TP0_CTX1_3D_CVS_REG, PIPE_BV, 1, USPTP,
+		gen7_0_0_tpl1_cluster_sp_vs_pipe_bv_registers, 0xb000 },
+};
+
+static const u32 gen7_0_0_rscc_registers[] = {
+	0x14000, 0x14036, 0x14040, 0x14042, 0x14080, 0x14084, 0x14089, 0x1408c,
+	0x14091, 0x14094, 0x14099, 0x1409c, 0x140a1, 0x140a4, 0x140a9, 0x140ac,
+	0x14100, 0x14102, 0x14114, 0x14119, 0x14124, 0x1412e, 0x14140, 0x14143,
+	0x14180, 0x14197, 0x14340, 0x14342, 0x14344, 0x14347, 0x1434c, 0x14373,
+	0x143ec, 0x143ef, 0x143f4, 0x1441b, 0x14494, 0x14497, 0x1449c, 0x144c3,
+	0x1453c, 0x1453f, 0x14544, 0x1456b, 0x145e4, 0x145e7, 0x145ec, 0x14613,
+	0x1468c, 0x1468f, 0x14694, 0x146bb, 0x14734, 0x14737, 0x1473c, 0x14763,
+	0x147dc, 0x147df, 0x147e4, 0x1480b, 0x14884, 0x14887, 0x1488c, 0x148b3,
+	0x1492c, 0x1492f, 0x14934, 0x1495b, 0x14f51, 0x14f54,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_rscc_registers), 8));
+
+static const u32 gen7_0_0_cpr_registers[] = {
+	0x26800, 0x26805, 0x26808, 0x2680c, 0x26814, 0x26814, 0x2681c, 0x2681c,
+	0x26820, 0x26838, 0x26840, 0x26840, 0x26848, 0x26848, 0x26850, 0x26850,
+	0x26880, 0x26898, 0x26980, 0x269b0, 0x269c0, 0x269c8, 0x269e0, 0x269ee,
+	0x269fb, 0x269ff, 0x26a02, 0x26a07, 0x26a09, 0x26a0b, 0x26a10, 0x26b0f,
+	0x27440, 0x27441, 0x27444, 0x27444, 0x27480, 0x274a2, 0x274ac, 0x274ac,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_cpr_registers), 8));
+
+static const u32 gen7_0_0_gpucc_registers[] = {
+	0x24000, 0x2400e, 0x24400, 0x2440e, 0x24800, 0x24805, 0x24c00, 0x24cff,
+	0x25800, 0x25804, 0x25c00, 0x25c04, 0x26000, 0x26004, 0x26400, 0x26405,
+	0x26414, 0x2641d, 0x2642a, 0x26430, 0x26432, 0x26432, 0x26441, 0x26455,
+	0x26466, 0x26468, 0x26478, 0x2647a, 0x26489, 0x2648a, 0x2649c, 0x2649e,
+	0x264a0, 0x264a3, 0x264b3, 0x264b5, 0x264c5, 0x264c7, 0x264d6, 0x264d8,
+	0x264e8, 0x264e9, 0x264f9, 0x264fc, 0x2650b, 0x2650c, 0x2651c, 0x2651e,
+	0x26540, 0x26570, 0x26600, 0x26616, 0x26620, 0x2662d,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_gpucc_registers), 8));
+
+static const u32 gen7_0_0_cx_misc_registers[] = {
+	0x27800, 0x27800, 0x27810, 0x27814, 0x27820, 0x27824, 0x27832, 0x27857,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_cx_misc_registers), 8));
+
+static const u32 gen7_0_0_dpm_registers[] = {
+	0x1aa00, 0x1aa06, 0x1aa09, 0x1aa0a, 0x1aa0c, 0x1aa0d, 0x1aa0f, 0x1aa12,
+	0x1aa14, 0x1aa47, 0x1aa50, 0x1aa51,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_0_0_dpm_registers), 8));
+
+static struct gen7_reg_list gen7_0_0_reg_list[] = {
+	{ gen7_0_0_gpu_registers, NULL },
+	{ gen7_0_0_dpm_registers, NULL },
+	{ NULL, NULL },
+};
+
+static const u32 *gen7_0_0_external_core_regs[] = {
+	gen7_0_0_gpucc_registers,
+	gen7_0_0_cpr_registers,
+};
+#endif /*_ADRENO_GEN7_0_0_SNAPSHOT_H */
--- a/qcom/opensource/graphics-kernel/adreno_gen7_11_0_snapshot.h
+++ b/qcom/opensource/graphics-kernel/adreno_gen7_11_0_snapshot.h
--- a/qcom/opensource/graphics-kernel/adreno_gen7_2_0_snapshot.h
+++ b/qcom/opensource/graphics-kernel/adreno_gen7_2_0_snapshot.h
@ -0,0 +1,752 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+#ifndef __ADRENO_GEN7_2_0_SNAPSHOT_H
+#define __ADRENO_GEN7_2_0_SNAPSHOT_H
+
+#include "adreno_gen7_snapshot.h"
+
+static const u32 gen7_2_0_debugbus_blocks[] = {
+	DEBUGBUS_CP_0_0,
+	DEBUGBUS_CP_0_1,
+	DEBUGBUS_RBBM,
+	DEBUGBUS_HLSQ,
+	DEBUGBUS_UCHE_0,
+	DEBUGBUS_UCHE_1,
+	DEBUGBUS_TESS_BR,
+	DEBUGBUS_TESS_BV,
+	DEBUGBUS_PC_BR,
+	DEBUGBUS_PC_BV,
+	DEBUGBUS_VFDP_BR,
+	DEBUGBUS_VFDP_BV,
+	DEBUGBUS_VPC_BR,
+	DEBUGBUS_VPC_BV,
+	DEBUGBUS_TSE_BR,
+	DEBUGBUS_TSE_BV,
+	DEBUGBUS_RAS_BR,
+	DEBUGBUS_RAS_BV,
+	DEBUGBUS_VSC,
+	DEBUGBUS_COM_0,
+	DEBUGBUS_LRZ_BR,
+	DEBUGBUS_LRZ_BV,
+	DEBUGBUS_UFC_0,
+	DEBUGBUS_UFC_1,
+	DEBUGBUS_GMU_GX,
+	DEBUGBUS_DBGC,
+	DEBUGBUS_GPC_BR,
+	DEBUGBUS_GPC_BV,
+	DEBUGBUS_LARC,
+	DEBUGBUS_HLSQ_SPTP,
+	DEBUGBUS_RB_0,
+	DEBUGBUS_RB_1,
+	DEBUGBUS_RB_2,
+	DEBUGBUS_RB_3,
+	DEBUGBUS_RB_4,
+	DEBUGBUS_RB_5,
+	DEBUGBUS_UCHE_WRAPPER,
+	DEBUGBUS_CCU_0,
+	DEBUGBUS_CCU_1,
+	DEBUGBUS_CCU_2,
+	DEBUGBUS_CCU_3,
+	DEBUGBUS_CCU_4,
+	DEBUGBUS_CCU_5,
+	DEBUGBUS_VFD_BR_0,
+	DEBUGBUS_VFD_BR_1,
+	DEBUGBUS_VFD_BR_2,
+	DEBUGBUS_VFD_BR_3,
+	DEBUGBUS_VFD_BR_4,
+	DEBUGBUS_VFD_BR_5,
+	DEBUGBUS_VFD_BV_0,
+	DEBUGBUS_VFD_BV_1,
+	DEBUGBUS_USP_0,
+	DEBUGBUS_USP_1,
+	DEBUGBUS_USP_2,
+	DEBUGBUS_USP_3,
+	DEBUGBUS_USP_4,
+	DEBUGBUS_USP_5,
+	DEBUGBUS_TP_0,
+	DEBUGBUS_TP_1,
+	DEBUGBUS_TP_2,
+	DEBUGBUS_TP_3,
+	DEBUGBUS_TP_4,
+	DEBUGBUS_TP_5,
+	DEBUGBUS_TP_6,
+	DEBUGBUS_TP_7,
+	DEBUGBUS_TP_8,
+	DEBUGBUS_TP_9,
+	DEBUGBUS_TP_10,
+	DEBUGBUS_TP_11,
+	DEBUGBUS_USPTP_0,
+	DEBUGBUS_USPTP_1,
+	DEBUGBUS_USPTP_2,
+	DEBUGBUS_USPTP_3,
+	DEBUGBUS_USPTP_4,
+	DEBUGBUS_USPTP_5,
+	DEBUGBUS_USPTP_6,
+	DEBUGBUS_USPTP_7,
+	DEBUGBUS_USPTP_8,
+	DEBUGBUS_USPTP_9,
+	DEBUGBUS_USPTP_10,
+	DEBUGBUS_USPTP_11,
+	DEBUGBUS_CCHE_0,
+	DEBUGBUS_CCHE_1,
+	DEBUGBUS_CCHE_2,
+};
+
+static struct gen7_shader_block gen7_2_0_shader_blocks[] = {
+	{TP0_TMO_DATA,                 0x200, 6, 2, PIPE_BR, USPTP},
+	{TP0_SMO_DATA,                  0x80, 6, 2, PIPE_BR, USPTP},
+	{TP0_MIPMAP_BASE_DATA,         0x3c0, 6, 2, PIPE_BR, USPTP},
+	{SP_INST_DATA,                 0x800, 6, 2, PIPE_BR, USPTP},
+	{SP_INST_DATA_1,               0x800, 6, 2, PIPE_BR, USPTP},
+	{SP_LB_0_DATA,                 0x800, 6, 2, PIPE_BR, USPTP},
+	{SP_LB_1_DATA,                 0x800, 6, 2, PIPE_BR, USPTP},
+	{SP_LB_2_DATA,                 0x800, 6, 2, PIPE_BR, USPTP},
+	{SP_LB_3_DATA,                 0x800, 6, 2, PIPE_BR, USPTP},
+	{SP_LB_4_DATA,                 0x800, 6, 2, PIPE_BR, USPTP},
+	{SP_LB_5_DATA,                 0x800, 6, 2, PIPE_BR, USPTP},
+	{SP_LB_6_DATA,                 0x800, 6, 2, PIPE_BR, USPTP},
+	{SP_LB_7_DATA,                 0x800, 6, 2, PIPE_BR, USPTP},
+	{SP_CB_RAM,                    0x390, 6, 2, PIPE_BR, USPTP},
+	{SP_LB_13_DATA,                0x800, 6, 2, PIPE_BR, USPTP},
+	{SP_LB_14_DATA,                0x800, 6, 2, PIPE_BR, USPTP},
+	{SP_INST_TAG,                   0xc0, 6, 2, PIPE_BR, USPTP},
+	{SP_INST_DATA_2,               0x800, 6, 2, PIPE_BR, USPTP},
+	{SP_TMO_TAG,                    0x80, 6, 2, PIPE_BR, USPTP},
+	{SP_SMO_TAG,                    0x80, 6, 2, PIPE_BR, USPTP},
+	{SP_STATE_DATA,                 0x40, 6, 2, PIPE_BR, USPTP},
+	{SP_HWAVE_RAM,                 0x100, 6, 2, PIPE_BR, USPTP},
+	{SP_L0_INST_BUF,                0x50, 6, 2, PIPE_BR, USPTP},
+	{SP_LB_8_DATA,                 0x800, 6, 2, PIPE_BR, USPTP},
+	{SP_LB_9_DATA,                 0x800, 6, 2, PIPE_BR, USPTP},
+	{SP_LB_10_DATA,                0x800, 6, 2, PIPE_BR, USPTP},
+	{SP_LB_11_DATA,                0x800, 6, 2, PIPE_BR, USPTP},
+	{SP_LB_12_DATA,                0x800, 6, 2, PIPE_BR, USPTP},
+	{HLSQ_CVS_BE_CTXT_BUF_RAM_TAG,  0x10, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_CVS_BE_CTXT_BUF_RAM_TAG,  0x10, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_CPS_BE_CTXT_BUF_RAM_TAG,  0x10, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_GFX_CVS_BE_CTXT_BUF_RAM, 0x300, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_GFX_CVS_BE_CTXT_BUF_RAM, 0x300, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_GFX_CPS_BE_CTXT_BUF_RAM, 0x300, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_CHUNK_CVS_RAM,           0x1c0, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_CHUNK_CVS_RAM,           0x1c0, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_CHUNK_CPS_RAM,           0x300, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_CHUNK_CPS_RAM,           0x180, 1, 1, PIPE_LPAC, HLSQ_STATE},
+	{HLSQ_CHUNK_CVS_RAM_TAG,        0x40, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_CHUNK_CVS_RAM_TAG,        0x40, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_CHUNK_CPS_RAM_TAG,        0x40, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_CHUNK_CPS_RAM_TAG,        0x40, 1, 1, PIPE_LPAC, HLSQ_STATE},
+	{HLSQ_ICB_CVS_CB_BASE_TAG,      0x10, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_ICB_CVS_CB_BASE_TAG,      0x10, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_ICB_CPS_CB_BASE_TAG,      0x10, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_ICB_CPS_CB_BASE_TAG,      0x10, 1, 1, PIPE_LPAC, HLSQ_STATE},
+	{HLSQ_CVS_MISC_RAM,            0x280, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_CVS_MISC_RAM,            0x280, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_CPS_MISC_RAM,            0x800, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_CPS_MISC_RAM,            0x200, 1, 1, PIPE_LPAC, HLSQ_STATE},
+	{HLSQ_CPS_MISC_RAM_1,          0x1c0, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_INST_RAM,                0x800, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_INST_RAM,                0x800, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_INST_RAM,                0x200, 1, 1, PIPE_LPAC, HLSQ_STATE},
+	{HLSQ_CVS_MISC_RAM_TAG,         0x10, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_CVS_MISC_RAM_TAG,         0x10, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_CPS_MISC_RAM_TAG,         0x10, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_CPS_MISC_RAM_TAG,         0x10, 1, 1, PIPE_LPAC, HLSQ_STATE},
+	{HLSQ_INST_RAM_TAG,             0x80, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_INST_RAM_TAG,             0x80, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_INST_RAM_TAG,             0x80, 1, 1, PIPE_LPAC, HLSQ_STATE},
+	{HLSQ_GFX_CVS_CONST_RAM_TAG,    0x64, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_GFX_CVS_CONST_RAM_TAG,    0x38, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_GFX_CPS_CONST_RAM_TAG,    0x64, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_GFX_CPS_CONST_RAM_TAG,    0x10, 1, 1, PIPE_LPAC, HLSQ_STATE},
+	{HLSQ_GFX_CVS_CONST_RAM,       0x800, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_GFX_CVS_CONST_RAM,       0x800, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_GFX_CPS_CONST_RAM,       0x800, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_GFX_CPS_CONST_RAM,       0x800, 1, 1, PIPE_LPAC, HLSQ_STATE},
+	{HLSQ_INST_RAM_1,              0x800, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_STPROC_META,              0x10, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_BV_BE_META,               0x10, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_BV_BE_META,               0x10, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_DATAPATH_META,            0x20, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_FRONTEND_META,            0x80, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_FRONTEND_META,            0x80, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_FRONTEND_META,            0x80, 1, 1, PIPE_LPAC, HLSQ_STATE},
+	{HLSQ_INDIRECT_META,            0x10, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_BACKEND_META,             0x40, 1, 1, PIPE_BR, HLSQ_STATE},
+	{HLSQ_BACKEND_META,             0x40, 1, 1, PIPE_BV, HLSQ_STATE},
+	{HLSQ_BACKEND_META,             0x40, 1, 1, PIPE_LPAC, HLSQ_STATE},
+};
+
+static const u32 gen7_2_0_gpu_registers[] = {
+	0x00000, 0x00000, 0x00002, 0x00002, 0x00011, 0x00012, 0x00016, 0x0001b,
+	0x0001f, 0x00032, 0x00038, 0x0003c, 0x00042, 0x00042, 0x00044, 0x00044,
+	0x00047, 0x00047, 0x00049, 0x0004a, 0x0004c, 0x0004c, 0x00050, 0x00050,
+	0x00056, 0x00056, 0x00073, 0x0007d, 0x000ad, 0x000ae, 0x000b0, 0x000b0,
+	0x000b4, 0x000b4, 0x000b8, 0x000b8, 0x000bc, 0x000bc, 0x000c0, 0x000c0,
+	0x000c4, 0x000c4, 0x000c8, 0x000c8, 0x000cc, 0x000cc, 0x000d0, 0x000d0,
+	0x000d4, 0x000d4, 0x000d8, 0x000d8, 0x000dc, 0x000dc, 0x000e0, 0x000e0,
+	0x000e4, 0x000e4, 0x000e8, 0x000e8, 0x000ec, 0x000ec, 0x000f0, 0x000f0,
+	0x000f4, 0x000f4, 0x000f8, 0x000f8, 0x00100, 0x00100, 0x00104, 0x0010c,
+	0x0010f, 0x0011d, 0x0012f, 0x0012f, 0x00200, 0x0020d, 0x00211, 0x00211,
+	0x00215, 0x00253, 0x00260, 0x00270, 0x00272, 0x00274, 0x00281, 0x0028d,
+	0x00300, 0x00401, 0x00410, 0x00451, 0x00460, 0x004a3, 0x004c0, 0x004d1,
+	0x00500, 0x00500, 0x00507, 0x0050b, 0x0050f, 0x0050f, 0x00511, 0x00511,
+	0x00533, 0x00536, 0x00540, 0x00555, 0x00564, 0x00567, 0x00574, 0x00577,
+	0x00584, 0x0059b, 0x005fb, 0x005ff, 0x00800, 0x00808, 0x00810, 0x00813,
+	0x00820, 0x00821, 0x00823, 0x00827, 0x00830, 0x00834, 0x0083f, 0x00841,
+	0x00843, 0x00847, 0x0084f, 0x00886, 0x008a0, 0x008ab, 0x008c0, 0x008c0,
+	0x008c4, 0x008c6, 0x008d0, 0x008dd, 0x008e0, 0x008e6, 0x008f0, 0x008f3,
+	0x00900, 0x00903, 0x00908, 0x00911, 0x00928, 0x0093e, 0x00942, 0x0094d,
+	0x00980, 0x00984, 0x0098d, 0x0098f, 0x009b0, 0x009b4, 0x009c2, 0x009c9,
+	0x009ce, 0x009d7, 0x009e0, 0x009e7, 0x00a00, 0x00a00, 0x00a02, 0x00a03,
+	0x00a10, 0x00a4f, 0x00a61, 0x00a9f, 0x00ad0, 0x00adb, 0x00b00, 0x00b31,
+	0x00b35, 0x00b3c, 0x00b40, 0x00b40, 0x00c00, 0x00c00, 0x00c02, 0x00c04,
+	0x00c06, 0x00c06, 0x00c10, 0x00cd9, 0x00ce0, 0x00d0c, 0x00df0, 0x00df4,
+	0x00e01, 0x00e02, 0x00e07, 0x00e0e, 0x00e10, 0x00e13, 0x00e17, 0x00e19,
+	0x00e1b, 0x00e2b, 0x00e30, 0x00e32, 0x00e38, 0x00e3c, 0x00e40, 0x00e4b,
+	0x0ec00, 0x0ec01, 0x0ec05, 0x0ec05, 0x0ec07, 0x0ec07, 0x0ec0a, 0x0ec0a,
+	0x0ec12, 0x0ec12, 0x0ec26, 0x0ec28, 0x0ec2b, 0x0ec2d, 0x0ec2f, 0x0ec2f,
+	0x0ec40, 0x0ec41, 0x0ec45, 0x0ec45, 0x0ec47, 0x0ec47, 0x0ec4a, 0x0ec4a,
+	0x0ec52, 0x0ec52, 0x0ec66, 0x0ec68, 0x0ec6b, 0x0ec6d, 0x0ec6f, 0x0ec6f,
+	0x0ec80, 0x0ec81, 0x0ec85, 0x0ec85, 0x0ec87, 0x0ec87, 0x0ec8a, 0x0ec8a,
+	0x0ec92, 0x0ec92, 0x0eca6, 0x0eca8, 0x0ecab, 0x0ecad, 0x0ecaf, 0x0ecaf,
+	0x0ecc0, 0x0ecc1, 0x0ecc5, 0x0ecc5, 0x0ecc7, 0x0ecc7, 0x0ecca, 0x0ecca,
+	0x0ecd2, 0x0ecd2, 0x0ece6, 0x0ece8, 0x0eceb, 0x0eced, 0x0ecef, 0x0ecef,
+	0x0ed00, 0x0ed01, 0x0ed05, 0x0ed05, 0x0ed07, 0x0ed07, 0x0ed0a, 0x0ed0a,
+	0x0ed12, 0x0ed12, 0x0ed26, 0x0ed28, 0x0ed2b, 0x0ed2d, 0x0ed2f, 0x0ed2f,
+	0x0ed40, 0x0ed41, 0x0ed45, 0x0ed45, 0x0ed47, 0x0ed47, 0x0ed4a, 0x0ed4a,
+	0x0ed52, 0x0ed52, 0x0ed66, 0x0ed68, 0x0ed6b, 0x0ed6d, 0x0ed6f, 0x0ed6f,
+	0x0ed80, 0x0ed81, 0x0ed85, 0x0ed85, 0x0ed87, 0x0ed87, 0x0ed8a, 0x0ed8a,
+	0x0ed92, 0x0ed92, 0x0eda6, 0x0eda8, 0x0edab, 0x0edad, 0x0edaf, 0x0edaf,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_gpu_registers), 8));
+
+static const u32 gen7_2_0_gmu_registers[] = {
+	0x10001, 0x10001, 0x10003, 0x10003, 0x10401, 0x10401, 0x10403, 0x10403,
+	0x10801, 0x10801, 0x10803, 0x10803, 0x10c01, 0x10c01, 0x10c03, 0x10c03,
+	0x11001, 0x11001, 0x11003, 0x11003, 0x11401, 0x11401, 0x11403, 0x11403,
+	0x11801, 0x11801, 0x11803, 0x11803, 0x11c01, 0x11c01, 0x11c03, 0x11c03,
+	0x1a79b, 0x1a79b, 0x1a7ac, 0x1a7b9, 0x1a7dc, 0x1a7dd, 0x1a7e0, 0x1a7e1,
+	0x1a803, 0x1a803, 0x1a805, 0x1a806, 0x1a84e, 0x1a84e, 0x1a856, 0x1a856,
+	0x1f400, 0x1f40d, 0x1f40f, 0x1f411, 0x1f500, 0x1f500, 0x1f507, 0x1f507,
+	0x1f509, 0x1f50b, 0x1f700, 0x1f701, 0x1f704, 0x1f706, 0x1f708, 0x1f709,
+	0x1f70c, 0x1f70d, 0x1f710, 0x1f711, 0x1f713, 0x1f716, 0x1f720, 0x1f724,
+	0x1f729, 0x1f729, 0x1f730, 0x1f747, 0x1f760, 0x1f761, 0x1f764, 0x1f76b,
+	0x1f800, 0x1f804, 0x1f807, 0x1f808, 0x1f80b, 0x1f80c, 0x1f80f, 0x1f80f,
+	0x1f811, 0x1f811, 0x1f813, 0x1f817, 0x1f819, 0x1f81c, 0x1f824, 0x1f82a,
+	0x1f82d, 0x1f830, 0x1f840, 0x1f853, 0x1f860, 0x1f860, 0x1f862, 0x1f864,
+	0x1f868, 0x1f868, 0x1f870, 0x1f879, 0x1f87f, 0x1f87f, 0x1f888, 0x1f889,
+	0x1f8a0, 0x1f8a2, 0x1f890, 0x1f892, 0x1f894, 0x1f896, 0x1f8a4, 0x1f8af,
+	0x1f8b8, 0x1f8b9, 0x1f8c0, 0x1f8c1, 0x1f8c3, 0x1f8c4, 0x1f8d0, 0x1f8d0,
+	0x1f8ec, 0x1f8ec, 0x1f8f0, 0x1f8f1, 0x1f910, 0x1f917, 0x1f920, 0x1f921,
+	0x1f924, 0x1f925, 0x1f928, 0x1f929, 0x1f92c, 0x1f92d, 0x1f940, 0x1f940,
+	0x1f942, 0x1f944, 0x1f948, 0x1f94a, 0x1f94f, 0x1f951, 0x1f954, 0x1f955,
+	0x1f958, 0x1f95a, 0x1f95d, 0x1f95d, 0x1f962, 0x1f96b, 0x1f970, 0x1f979,
+	0x1f97c, 0x1f97c, 0x1f980, 0x1f981, 0x1f984, 0x1f986, 0x1f992, 0x1f993,
+	0x1f996, 0x1f99e, 0x1f9c0, 0x1f9c0, 0x1f9c5, 0x1f9d4, 0x1f9f0, 0x1f9f1,
+	0x1f9f8, 0x1f9fa, 0x1f9fc, 0x1f9fc, 0x1fa00, 0x1fa03, 0x20000, 0x20012,
+	0x20018, 0x20018, 0x2001a, 0x2001a, 0x20020, 0x20024, 0x20030, 0x20031,
+	0x20034, 0x20036, 0x23801, 0x23801, 0x23803, 0x23803, 0x23805, 0x23805,
+	0x23807, 0x23807, 0x23809, 0x23809, 0x2380b, 0x2380b, 0x2380d, 0x2380d,
+	0x2380f, 0x2380f, 0x23811, 0x23811, 0x23813, 0x23813, 0x23815, 0x23815,
+	0x23817, 0x23817, 0x23819, 0x23819, 0x2381b, 0x2381b, 0x2381d, 0x2381d,
+	0x2381f, 0x23820, 0x23822, 0x23822, 0x23824, 0x23824, 0x23826, 0x23826,
+	0x23828, 0x23828, 0x2382a, 0x2382a, 0x2382c, 0x2382c, 0x2382e, 0x2382e,
+	0x23830, 0x23830, 0x23832, 0x23832, 0x23834, 0x23834, 0x23836, 0x23836,
+	0x23838, 0x23838, 0x2383a, 0x2383a, 0x2383c, 0x2383c, 0x2383e, 0x2383e,
+	0x23840, 0x23847, 0x23b00, 0x23b01, 0x23b03, 0x23b03, 0x23b05, 0x23b0e,
+	0x23b10, 0x23b13, 0x23b15, 0x23b16, 0x23b20, 0x23b20, 0x23b28, 0x23b28,
+	0x23b30, 0x23b30,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_gmu_registers), 8));
+
+static const u32 gen7_2_0_gmugx_registers[] = {
+	0x1a400, 0x1a41f, 0x1a440, 0x1a45f, 0x1a480, 0x1a49f, 0x1a4c0, 0x1a4df,
+	0x1a500, 0x1a51f, 0x1a540, 0x1a55f, 0x1a580, 0x1a59f, 0x1a5c0, 0x1a5df,
+	0x1a600, 0x1a61f, 0x1a640, 0x1a65f, 0x1a780, 0x1a781, 0x1a783, 0x1a785,
+	0x1a787, 0x1a789, 0x1a78b, 0x1a78d, 0x1a78f, 0x1a791, 0x1a793, 0x1a795,
+	0x1a797, 0x1a799, 0x1a79c, 0x1a79d, 0x1a79f, 0x1a79f, 0x1a7a0, 0x1a7a1,
+	0x1a7a3, 0x1a7a3, 0x1a7a8, 0x1a7ab, 0x1a7c0, 0x1a7c1, 0x1a7c4, 0x1a7c5,
+	0x1a7c8, 0x1a7c9, 0x1a7cc, 0x1a7cd, 0x1a7d0, 0x1a7d1, 0x1a7d4, 0x1a7d5,
+	0x1a7d8, 0x1a7d9, 0x1a7fc, 0x1a7fd, 0x1a800, 0x1a802, 0x1a804, 0x1a804,
+	0x1a816, 0x1a816, 0x1a81e, 0x1a81e, 0x1a826, 0x1a826, 0x1a82e, 0x1a82e,
+	0x1a836, 0x1a836, 0x1a83e, 0x1a83e, 0x1a846, 0x1a846, 0x1a860, 0x1a862,
+	0x1a864, 0x1a867, 0x1a870, 0x1a870, 0x1a883, 0x1a884, 0x1a8c0, 0x1a8c2,
+	0x1a8c4, 0x1a8c7, 0x1a8d0, 0x1a8d3, 0x1a900, 0x1a92b, 0x1a940, 0x1a940,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_gmugx_registers), 8));
+
+static const u32 gen7_2_0_noncontext_pipe_br_registers[] = {
+	0x00887, 0x0088c, 0x08600, 0x08600, 0x08602, 0x08602, 0x08610, 0x0861b,
+	0x08620, 0x08620, 0x08630, 0x08630, 0x08637, 0x08639, 0x08640, 0x08640,
+	0x09600, 0x09600, 0x09602, 0x09603, 0x0960a, 0x09616, 0x09624, 0x0963a,
+	0x09640, 0x09640, 0x09e00, 0x09e00, 0x09e02, 0x09e07, 0x09e0a, 0x09e16,
+	0x09e19, 0x09e19, 0x09e1c, 0x09e1c, 0x09e20, 0x09e25, 0x09e30, 0x09e31,
+	0x09e40, 0x09e51, 0x09e64, 0x09e64, 0x09e70, 0x09e72, 0x09e78, 0x09e79,
+	0x09e80, 0x09fff, 0x0a600, 0x0a600, 0x0a603, 0x0a603, 0x0a610, 0x0a61f,
+	0x0a630, 0x0a631, 0x0a638, 0x0a63c,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_noncontext_pipe_br_registers), 8));
+
+static const u32 gen7_2_0_noncontext_pipe_bv_registers[] = {
+	0x00887, 0x0088c, 0x08600, 0x08600, 0x08602, 0x08602, 0x08610, 0x0861b,
+	0x08620, 0x08620, 0x08630, 0x08630, 0x08637, 0x08639, 0x08640, 0x08640,
+	0x09600, 0x09600, 0x09602, 0x09603, 0x0960a, 0x09616, 0x09624, 0x0963a,
+	0x09640, 0x09640, 0x09e00, 0x09e00, 0x09e02, 0x09e07, 0x09e0a, 0x09e16,
+	0x09e19, 0x09e19, 0x09e1c, 0x09e1c, 0x09e20, 0x09e25, 0x09e30, 0x09e31,
+	0x09e40, 0x09e51, 0x09e64, 0x09e64, 0x09e70, 0x09e72, 0x09e78, 0x09e79,
+	0x09e80, 0x09fff, 0x0a600, 0x0a600, 0x0a603, 0x0a603, 0x0a610, 0x0a61f,
+	0x0a630, 0x0a631, 0x0a638, 0x0a63c,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_noncontext_pipe_bv_registers), 8));
+
+static const u32 gen7_2_0_noncontext_rb_rac_pipe_br_registers[] = {
+	0x08e10, 0x08e1c, 0x08e20, 0x08e25, 0x08e51, 0x08e5a, 0x08ea0, 0x08ea3,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_noncontext_rb_rac_pipe_br_registers), 8));
+
+static const u32 gen7_2_0_noncontext_rb_rbp_pipe_br_registers[] = {
+	0x08e01, 0x08e01, 0x08e04, 0x08e04, 0x08e06, 0x08e09, 0x08e0c, 0x08e0c,
+	0x08e28, 0x08e28, 0x08e2c, 0x08e35, 0x08e3b, 0x08e40, 0x08e50, 0x08e50,
+	0x08e5b, 0x08e5d, 0x08e5f, 0x08e5f, 0x08e61, 0x08e61, 0x08e63, 0x08e66,
+	0x08e68, 0x08e69, 0x08e70, 0x08e79, 0x08e80, 0x08e8f,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_noncontext_rb_rbp_pipe_br_registers), 8));
+
+static const u32 gen7_2_0_gras_cluster_gras_pipe_br_registers[] = {
+	0x08000, 0x0800c, 0x08010, 0x08092, 0x08094, 0x08099, 0x0809b, 0x0809d,
+	0x080a0, 0x080a7, 0x080af, 0x080f1, 0x080f4, 0x080f6, 0x080f8, 0x080fa,
+	0x08100, 0x08107, 0x08109, 0x0810b, 0x08110, 0x08113, 0x08120, 0x0813f,
+	0x08400, 0x08406, 0x0840a, 0x0840b,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_gras_cluster_gras_pipe_br_registers), 8));
+
+static const u32 gen7_2_0_gras_cluster_gras_pipe_bv_registers[] = {
+	0x08000, 0x0800c, 0x08010, 0x08092, 0x08094, 0x08099, 0x0809b, 0x0809d,
+	0x080a0, 0x080a7, 0x080af, 0x080f1, 0x080f4, 0x080f6, 0x080f8, 0x080fa,
+	0x08100, 0x08107, 0x08109, 0x0810b, 0x08110, 0x08113, 0x08120, 0x0813f,
+	0x08400, 0x08406, 0x0840a, 0x0840b,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_gras_cluster_gras_pipe_bv_registers), 8));
+
+static const u32 gen7_2_0_rb_rac_cluster_ps_pipe_br_registers[] = {
+	0x08802, 0x08802, 0x08804, 0x08806, 0x08809, 0x0880a, 0x0880e, 0x08811,
+	0x08818, 0x0881e, 0x08821, 0x08821, 0x08823, 0x08826, 0x08829, 0x08829,
+	0x0882b, 0x0882e, 0x08831, 0x08831, 0x08833, 0x08836, 0x08839, 0x08839,
+	0x0883b, 0x0883e, 0x08841, 0x08841, 0x08843, 0x08846, 0x08849, 0x08849,
+	0x0884b, 0x0884e, 0x08851, 0x08851, 0x08853, 0x08856, 0x08859, 0x08859,
+	0x0885b, 0x0885e, 0x08860, 0x08864, 0x08870, 0x08870, 0x08873, 0x08876,
+	0x08878, 0x08879, 0x08882, 0x08885, 0x08887, 0x08889, 0x08891, 0x08891,
+	0x08898, 0x08899, 0x088c0, 0x088c1, 0x088e5, 0x088e5, 0x088f4, 0x088f5,
+	0x08a00, 0x08a05, 0x08a10, 0x08a15, 0x08a20, 0x08a25, 0x08a30, 0x08a35,
+	0x08c00, 0x08c01, 0x08c18, 0x08c1f, 0x08c26, 0x08c34,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_rb_rac_cluster_ps_pipe_br_registers), 8));
+
+static const u32 gen7_2_0_sp_cluster_sp_ps_pipe_br_hlsq_state_registers[] = {
+	0x0a980, 0x0a984, 0x0a99e, 0x0a99e, 0x0a9a7, 0x0a9a7, 0x0a9aa, 0x0a9aa,
+	0x0a9ae, 0x0a9b0, 0x0a9b2, 0x0a9b5, 0x0a9ba, 0x0a9ba, 0x0a9bc, 0x0a9bc,
+	0x0a9c4, 0x0a9c4, 0x0a9cd, 0x0a9cd, 0x0a9e0, 0x0a9fc, 0x0aa00, 0x0aa00,
+	0x0aa30, 0x0aa31, 0x0aa40, 0x0aabf, 0x0ab00, 0x0ab03, 0x0ab05, 0x0ab05,
+	0x0ab0a, 0x0ab1b, 0x0ab20, 0x0ab20, 0x0ab40, 0x0abbf,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_sp_cluster_sp_ps_pipe_br_hlsq_state_registers), 8));
+
+static const u32 gen7_2_0_sp_cluster_sp_ps_pipe_br_sp_top_registers[] = {
+	0x0a980, 0x0a980, 0x0a982, 0x0a984, 0x0a99e, 0x0a9a2, 0x0a9a7, 0x0a9a8,
+	0x0a9aa, 0x0a9aa, 0x0a9ae, 0x0a9ae, 0x0a9b0, 0x0a9b1, 0x0a9b3, 0x0a9b5,
+	0x0a9ba, 0x0a9bc, 0x0a9c5, 0x0a9c5, 0x0a9e0, 0x0a9f9, 0x0aa00, 0x0aa01,
+	0x0ab00, 0x0ab00, 0x0ab02, 0x0ab02, 0x0ab04, 0x0ab05, 0x0ab0a, 0x0ab1b,
+	0x0ab20, 0x0ab20,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_sp_cluster_sp_ps_pipe_br_sp_top_registers), 8));
+
+static const u32 gen7_2_0_sp_cluster_sp_ps_pipe_br_usptp_registers[] = {
+	0x0a980, 0x0a982, 0x0a985, 0x0a9a6, 0x0a9a8, 0x0a9a9, 0x0a9ab, 0x0a9ae,
+	0x0a9b0, 0x0a9b3, 0x0a9b6, 0x0a9b9, 0x0a9bb, 0x0a9bf, 0x0a9c2, 0x0a9c3,
+	0x0a9c5, 0x0a9c5, 0x0a9cd, 0x0a9cd, 0x0a9d0, 0x0a9d3, 0x0aa01, 0x0aa01,
+	0x0aa30, 0x0aa31, 0x0aa40, 0x0aabf, 0x0ab00, 0x0ab05, 0x0ab21, 0x0ab22,
+	0x0ab40, 0x0abbf,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_sp_cluster_sp_ps_pipe_br_usptp_registers), 8));
+
+static const u32 gen7_2_0_sp_cluster_sp_ps_pipe_lpac_hlsq_state_registers[] = {
+	0x0a9b0, 0x0a9b0, 0x0a9b2, 0x0a9b5, 0x0a9ba, 0x0a9ba, 0x0a9bc, 0x0a9bc,
+	0x0a9c4, 0x0a9c4, 0x0a9cd, 0x0a9cd, 0x0a9e2, 0x0a9e3, 0x0a9e6, 0x0a9fc,
+	0x0aa00, 0x0aa00, 0x0aa31, 0x0aa31, 0x0ab00, 0x0ab01,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_sp_cluster_sp_ps_pipe_lpac_hlsq_state_registers), 8));
+
+static const u32 gen7_2_0_sp_cluster_sp_ps_pipe_lpac_sp_top_registers[] = {
+	0x0a9b0, 0x0a9b1, 0x0a9b3, 0x0a9b5, 0x0a9ba, 0x0a9bc, 0x0a9c5, 0x0a9c5,
+	0x0a9e2, 0x0a9e3, 0x0a9e6, 0x0a9f9, 0x0aa00, 0x0aa00, 0x0ab00, 0x0ab00,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_sp_cluster_sp_ps_pipe_lpac_sp_top_registers), 8));
+
+static const u32 gen7_2_0_sp_cluster_sp_ps_pipe_lpac_usptp_registers[] = {
+	0x0a9b0, 0x0a9b3, 0x0a9b6, 0x0a9b9, 0x0a9bb, 0x0a9be, 0x0a9c2, 0x0a9c3,
+	0x0a9c5, 0x0a9c5, 0x0a9cd, 0x0a9cd, 0x0a9d0, 0x0a9d3, 0x0aa31, 0x0aa31,
+	0x0ab00, 0x0ab01,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_sp_cluster_sp_ps_pipe_lpac_usptp_registers), 8));
+
+static const u32 gen7_2_0_sp_cluster_sp_vs_pipe_br_hlsq_state_registers[] = {
+	0x0a800, 0x0a801, 0x0a81b, 0x0a81d, 0x0a822, 0x0a822, 0x0a824, 0x0a824,
+	0x0a827, 0x0a82a, 0x0a830, 0x0a830, 0x0a832, 0x0a835, 0x0a83a, 0x0a83a,
+	0x0a83c, 0x0a83c, 0x0a83f, 0x0a841, 0x0a85b, 0x0a85d, 0x0a862, 0x0a862,
+	0x0a864, 0x0a864, 0x0a867, 0x0a867, 0x0a870, 0x0a870, 0x0a872, 0x0a872,
+	0x0a88c, 0x0a88e, 0x0a893, 0x0a893, 0x0a895, 0x0a895, 0x0a898, 0x0a898,
+	0x0a89a, 0x0a89d, 0x0a8a0, 0x0a8af, 0x0a8c0, 0x0a8c3, 0x0ab00, 0x0ab03,
+	0x0ab05, 0x0ab05, 0x0ab0a, 0x0ab1b, 0x0ab20, 0x0ab20, 0x0ab40, 0x0abbf,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_sp_cluster_sp_vs_pipe_br_hlsq_state_registers), 8));
+
+static const u32 gen7_2_0_sp_cluster_sp_vs_pipe_br_sp_top_registers[] = {
+	0x0a800, 0x0a800, 0x0a81c, 0x0a81d, 0x0a822, 0x0a824, 0x0a82d, 0x0a82d,
+	0x0a82f, 0x0a831, 0x0a834, 0x0a835, 0x0a83a, 0x0a83c, 0x0a840, 0x0a840,
+	0x0a85c, 0x0a85d, 0x0a862, 0x0a864, 0x0a868, 0x0a868, 0x0a870, 0x0a871,
+	0x0a88d, 0x0a88e, 0x0a893, 0x0a895, 0x0a899, 0x0a899, 0x0a8a0, 0x0a8af,
+	0x0ab00, 0x0ab00, 0x0ab02, 0x0ab02, 0x0ab04, 0x0ab05, 0x0ab0a, 0x0ab1b,
+	0x0ab20, 0x0ab20,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_sp_cluster_sp_vs_pipe_br_sp_top_registers), 8));
+
+static const u32 gen7_2_0_sp_cluster_sp_vs_pipe_br_usptp_registers[] = {
+	0x0a800, 0x0a81b, 0x0a81e, 0x0a821, 0x0a823, 0x0a827, 0x0a82d, 0x0a82d,
+	0x0a82f, 0x0a833, 0x0a836, 0x0a839, 0x0a83b, 0x0a85b, 0x0a85e, 0x0a861,
+	0x0a863, 0x0a868, 0x0a870, 0x0a88c, 0x0a88f, 0x0a892, 0x0a894, 0x0a899,
+	0x0a8c0, 0x0a8c3, 0x0ab00, 0x0ab05, 0x0ab21, 0x0ab22, 0x0ab40, 0x0abbf,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_sp_cluster_sp_vs_pipe_br_usptp_registers), 8));
+
+static const u32 gen7_2_0_sp_cluster_sp_vs_pipe_bv_hlsq_state_registers[] = {
+	0x0a800, 0x0a801, 0x0a81b, 0x0a81d, 0x0a822, 0x0a822, 0x0a824, 0x0a824,
+	0x0a827, 0x0a82a, 0x0a830, 0x0a830, 0x0a832, 0x0a835, 0x0a83a, 0x0a83a,
+	0x0a83c, 0x0a83c, 0x0a83f, 0x0a841, 0x0a85b, 0x0a85d, 0x0a862, 0x0a862,
+	0x0a864, 0x0a864, 0x0a867, 0x0a867, 0x0a870, 0x0a870, 0x0a872, 0x0a872,
+	0x0a88c, 0x0a88e, 0x0a893, 0x0a893, 0x0a895, 0x0a895, 0x0a898, 0x0a898,
+	0x0a89a, 0x0a89d, 0x0a8a0, 0x0a8af, 0x0a8c0, 0x0a8c3, 0x0ab00, 0x0ab02,
+	0x0ab0a, 0x0ab1b, 0x0ab20, 0x0ab20, 0x0ab40, 0x0abbf,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_sp_cluster_sp_vs_pipe_bv_hlsq_state_registers), 8));
+
+static const u32 gen7_2_0_sp_cluster_sp_vs_pipe_bv_sp_top_registers[] = {
+	0x0a800, 0x0a800, 0x0a81c, 0x0a81d, 0x0a822, 0x0a824, 0x0a82d, 0x0a82d,
+	0x0a82f, 0x0a831, 0x0a834, 0x0a835, 0x0a83a, 0x0a83c, 0x0a840, 0x0a840,
+	0x0a85c, 0x0a85d, 0x0a862, 0x0a864, 0x0a868, 0x0a868, 0x0a870, 0x0a871,
+	0x0a88d, 0x0a88e, 0x0a893, 0x0a895, 0x0a899, 0x0a899, 0x0a8a0, 0x0a8af,
+	0x0ab00, 0x0ab00, 0x0ab02, 0x0ab02, 0x0ab0a, 0x0ab1b, 0x0ab20, 0x0ab20,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_sp_cluster_sp_vs_pipe_bv_sp_top_registers), 8));
+
+static const u32 gen7_2_0_sp_cluster_sp_vs_pipe_bv_usptp_registers[] = {
+	0x0a800, 0x0a81b, 0x0a81e, 0x0a821, 0x0a823, 0x0a827, 0x0a82d, 0x0a82d,
+	0x0a82f, 0x0a833, 0x0a836, 0x0a839, 0x0a83b, 0x0a85b, 0x0a85e, 0x0a861,
+	0x0a863, 0x0a868, 0x0a870, 0x0a88c, 0x0a88f, 0x0a892, 0x0a894, 0x0a899,
+	0x0a8c0, 0x0a8c3, 0x0ab00, 0x0ab02, 0x0ab21, 0x0ab22, 0x0ab40, 0x0abbf,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_sp_cluster_sp_vs_pipe_bv_usptp_registers), 8));
+
+static const u32 gen7_2_0_sp_noncontext_pipe_lpac_hlsq_state_registers[] = {
+	0x0af88, 0x0af8b,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_sp_noncontext_pipe_lpac_hlsq_state_registers), 8));
+
+static const struct gen7_sel_reg  gen7_2_0_rb_rac_sel = {
+	.host_reg = GEN7_RB_SUB_BLOCK_SEL_CNTL_HOST,
+	.cd_reg = GEN7_RB_SUB_BLOCK_SEL_CNTL_CD,
+	.val = 0x0,
+};
+
+static const struct gen7_sel_reg gen7_2_0_rb_rbp_sel = {
+	.host_reg = GEN7_RB_SUB_BLOCK_SEL_CNTL_HOST,
+	.cd_reg = GEN7_RB_SUB_BLOCK_SEL_CNTL_CD,
+	.val = 0x9,
+};
+
+static struct gen7_cluster_registers gen7_2_0_clusters[] = {
+	{ CLUSTER_NONE, PIPE_BR, STATE_NON_CONTEXT,
+		gen7_2_0_noncontext_pipe_br_registers, },
+	{ CLUSTER_NONE, PIPE_BV, STATE_NON_CONTEXT,
+		gen7_2_0_noncontext_pipe_bv_registers, },
+	{ CLUSTER_NONE, PIPE_LPAC, STATE_NON_CONTEXT,
+		gen7_0_0_noncontext_pipe_lpac_registers, },
+	{ CLUSTER_NONE, PIPE_BR, STATE_NON_CONTEXT,
+		gen7_2_0_noncontext_rb_rac_pipe_br_registers, &gen7_2_0_rb_rac_sel, },
+	{ CLUSTER_NONE, PIPE_BR, STATE_NON_CONTEXT,
+		gen7_2_0_noncontext_rb_rbp_pipe_br_registers, &gen7_2_0_rb_rbp_sel, },
+	{ CLUSTER_GRAS, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen7_2_0_gras_cluster_gras_pipe_br_registers, },
+	{ CLUSTER_GRAS, PIPE_BV, STATE_FORCE_CTXT_0,
+		gen7_2_0_gras_cluster_gras_pipe_bv_registers, },
+	{ CLUSTER_GRAS, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen7_2_0_gras_cluster_gras_pipe_br_registers, },
+	{ CLUSTER_GRAS, PIPE_BV, STATE_FORCE_CTXT_1,
+		gen7_2_0_gras_cluster_gras_pipe_bv_registers, },
+	{ CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen7_0_0_pc_cluster_fe_pipe_br_registers, },
+	{ CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_0,
+		gen7_0_0_pc_cluster_fe_pipe_bv_registers, },
+	{ CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen7_0_0_pc_cluster_fe_pipe_br_registers, },
+	{ CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_1,
+		gen7_0_0_pc_cluster_fe_pipe_bv_registers, },
+	{ CLUSTER_PS, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen7_2_0_rb_rac_cluster_ps_pipe_br_registers, &gen7_2_0_rb_rac_sel, },
+	{ CLUSTER_PS, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen7_2_0_rb_rac_cluster_ps_pipe_br_registers, &gen7_2_0_rb_rac_sel, },
+	{ CLUSTER_PS, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen7_0_0_rb_rbp_cluster_ps_pipe_br_registers, &gen7_2_0_rb_rbp_sel, },
+	{ CLUSTER_PS, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen7_0_0_rb_rbp_cluster_ps_pipe_br_registers, &gen7_2_0_rb_rbp_sel, },
+	{ CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen7_0_0_vfd_cluster_fe_pipe_br_registers, },
+	{ CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_0,
+		gen7_0_0_vfd_cluster_fe_pipe_bv_registers, },
+	{ CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen7_0_0_vfd_cluster_fe_pipe_br_registers, },
+	{ CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_1,
+		gen7_0_0_vfd_cluster_fe_pipe_bv_registers, },
+	{ CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen7_0_0_vpc_cluster_fe_pipe_br_registers, },
+	{ CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_0,
+		gen7_0_0_vpc_cluster_fe_pipe_bv_registers, },
+	{ CLUSTER_FE, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen7_0_0_vpc_cluster_fe_pipe_br_registers, },
+	{ CLUSTER_FE, PIPE_BV, STATE_FORCE_CTXT_1,
+		gen7_0_0_vpc_cluster_fe_pipe_bv_registers, },
+	{ CLUSTER_PC_VS, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen7_0_0_vpc_cluster_pc_vs_pipe_br_registers, },
+	{ CLUSTER_PC_VS, PIPE_BV, STATE_FORCE_CTXT_0,
+		gen7_0_0_vpc_cluster_pc_vs_pipe_bv_registers, },
+	{ CLUSTER_PC_VS, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen7_0_0_vpc_cluster_pc_vs_pipe_br_registers, },
+	{ CLUSTER_PC_VS, PIPE_BV, STATE_FORCE_CTXT_1,
+		gen7_0_0_vpc_cluster_pc_vs_pipe_bv_registers, },
+	{ CLUSTER_VPC_PS, PIPE_BR, STATE_FORCE_CTXT_0,
+		gen7_0_0_vpc_cluster_vpc_ps_pipe_br_registers, },
+	{ CLUSTER_VPC_PS, PIPE_BV, STATE_FORCE_CTXT_0,
+		gen7_0_0_vpc_cluster_vpc_ps_pipe_bv_registers, },
+	{ CLUSTER_VPC_PS, PIPE_BR, STATE_FORCE_CTXT_1,
+		gen7_0_0_vpc_cluster_vpc_ps_pipe_br_registers, },
+	{ CLUSTER_VPC_PS, PIPE_BV, STATE_FORCE_CTXT_1,
+		gen7_0_0_vpc_cluster_vpc_ps_pipe_bv_registers, },
+};
+
+static struct gen7_sptp_cluster_registers gen7_2_0_sptp_clusters[] = {
+	{ CLUSTER_NONE, SP_NCTX_REG, PIPE_BR, 0, HLSQ_STATE,
+		gen7_0_0_sp_noncontext_pipe_br_hlsq_state_registers, 0xae00 },
+	{ CLUSTER_NONE, SP_NCTX_REG, PIPE_BR, 0, SP_TOP,
+		gen7_0_0_sp_noncontext_pipe_br_sp_top_registers, 0xae00 },
+	{ CLUSTER_NONE, SP_NCTX_REG, PIPE_BR, 0, USPTP,
+		gen7_0_0_sp_noncontext_pipe_br_usptp_registers, 0xae00 },
+	{ CLUSTER_NONE, SP_NCTX_REG, PIPE_LPAC, 0, HLSQ_STATE,
+		gen7_2_0_sp_noncontext_pipe_lpac_hlsq_state_registers, 0xaf80 },
+	{ CLUSTER_NONE, SP_NCTX_REG, PIPE_LPAC, 0, SP_TOP,
+		gen7_0_0_sp_noncontext_pipe_lpac_sp_top_registers, 0xaf80 },
+	{ CLUSTER_NONE, SP_NCTX_REG, PIPE_LPAC, 0, USPTP,
+		gen7_0_0_sp_noncontext_pipe_lpac_usptp_registers, 0xaf80 },
+	{ CLUSTER_NONE, TP0_NCTX_REG, PIPE_BR, 0, USPTP,
+		gen7_0_0_tpl1_noncontext_pipe_br_registers, 0xb600 },
+	{ CLUSTER_NONE, TP0_NCTX_REG, PIPE_LPAC, 0, USPTP,
+		gen7_0_0_tpl1_noncontext_pipe_lpac_registers, 0xb780 },
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, HLSQ_STATE,
+		gen7_2_0_sp_cluster_sp_ps_pipe_br_hlsq_state_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, HLSQ_DP,
+		gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_dp_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, SP_TOP,
+		gen7_2_0_sp_cluster_sp_ps_pipe_br_sp_top_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_BR, 0, USPTP,
+		gen7_2_0_sp_cluster_sp_ps_pipe_br_usptp_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, HLSQ_STATE,
+		gen7_2_0_sp_cluster_sp_ps_pipe_br_hlsq_state_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, HLSQ_DP,
+		gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_dp_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, SP_TOP,
+		gen7_2_0_sp_cluster_sp_ps_pipe_br_sp_top_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX1_3D_CPS_REG, PIPE_BR, 1, USPTP,
+		gen7_2_0_sp_cluster_sp_ps_pipe_br_usptp_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX2_3D_CPS_REG, PIPE_BR, 2, HLSQ_DP,
+		gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_dp_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX3_3D_CPS_REG, PIPE_BR, 3, HLSQ_DP,
+		gen7_0_0_sp_cluster_sp_ps_pipe_br_hlsq_dp_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX2_3D_CPS_REG, PIPE_BR, 2, SP_TOP,
+		gen7_2_0_sp_cluster_sp_ps_pipe_br_sp_top_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX2_3D_CPS_REG, PIPE_BR, 2, USPTP,
+		gen7_2_0_sp_cluster_sp_ps_pipe_br_usptp_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX3_3D_CPS_REG, PIPE_BR, 3, SP_TOP,
+		gen7_2_0_sp_cluster_sp_ps_pipe_br_sp_top_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX3_3D_CPS_REG, PIPE_BR, 3, USPTP,
+		gen7_2_0_sp_cluster_sp_ps_pipe_br_usptp_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, HLSQ_STATE,
+		gen7_2_0_sp_cluster_sp_ps_pipe_lpac_hlsq_state_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, HLSQ_DP,
+		gen7_0_0_sp_cluster_sp_ps_pipe_lpac_hlsq_dp_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, SP_TOP,
+		gen7_2_0_sp_cluster_sp_ps_pipe_lpac_sp_top_registers, 0xa800 },
+	{ CLUSTER_SP_PS, SP_CTX0_3D_CPS_REG, PIPE_LPAC, 0, USPTP,
+		gen7_2_0_sp_cluster_sp_ps_pipe_lpac_usptp_registers, 0xa800 },
+	{ CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, HLSQ_STATE,
+		gen7_2_0_sp_cluster_sp_vs_pipe_br_hlsq_state_registers, 0xa800 },
+	{ CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, HLSQ_STATE,
+		gen7_2_0_sp_cluster_sp_vs_pipe_bv_hlsq_state_registers, 0xa800 },
+	{ CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, SP_TOP,
+		gen7_2_0_sp_cluster_sp_vs_pipe_br_sp_top_registers, 0xa800 },
+	{ CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, SP_TOP,
+		gen7_2_0_sp_cluster_sp_vs_pipe_bv_sp_top_registers, 0xa800 },
+	{ CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BR, 0, USPTP,
+		gen7_2_0_sp_cluster_sp_vs_pipe_br_usptp_registers, 0xa800 },
+	{ CLUSTER_SP_VS, SP_CTX0_3D_CVS_REG, PIPE_BV, 0, USPTP,
+		gen7_2_0_sp_cluster_sp_vs_pipe_bv_usptp_registers, 0xa800 },
+	{ CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, HLSQ_STATE,
+		gen7_2_0_sp_cluster_sp_vs_pipe_br_hlsq_state_registers, 0xa800 },
+	{ CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, HLSQ_STATE,
+		gen7_2_0_sp_cluster_sp_vs_pipe_bv_hlsq_state_registers, 0xa800 },
+	{ CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, SP_TOP,
+		gen7_2_0_sp_cluster_sp_vs_pipe_br_sp_top_registers, 0xa800 },
+	{ CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, SP_TOP,
+		gen7_2_0_sp_cluster_sp_vs_pipe_bv_sp_top_registers, 0xa800 },
+	{ CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BR, 1, USPTP,
+		gen7_2_0_sp_cluster_sp_vs_pipe_br_usptp_registers, 0xa800 },
+	{ CLUSTER_SP_VS, SP_CTX1_3D_CVS_REG, PIPE_BV, 1, USPTP,
+		gen7_2_0_sp_cluster_sp_vs_pipe_bv_usptp_registers, 0xa800 },
+	{ CLUSTER_SP_PS, TP0_CTX0_3D_CPS_REG, PIPE_BR, 0, USPTP,
+		gen7_0_0_tpl1_cluster_sp_ps_pipe_br_registers, 0xb000 },
+	{ CLUSTER_SP_PS, TP0_CTX1_3D_CPS_REG, PIPE_BR, 1, USPTP,
+		gen7_0_0_tpl1_cluster_sp_ps_pipe_br_registers, 0xb000 },
+	{ CLUSTER_SP_PS, TP0_CTX2_3D_CPS_REG, PIPE_BR, 2, USPTP,
+		gen7_0_0_tpl1_cluster_sp_ps_pipe_br_registers, 0xb000 },
+	{ CLUSTER_SP_PS, TP0_CTX3_3D_CPS_REG, PIPE_BR, 3, USPTP,
+		gen7_0_0_tpl1_cluster_sp_ps_pipe_br_registers, 0xb000 },
+	{ CLUSTER_SP_PS, TP0_CTX0_3D_CPS_REG, PIPE_LPAC, 0, USPTP,
+		gen7_0_0_tpl1_cluster_sp_ps_pipe_lpac_registers, 0xb000 },
+	{ CLUSTER_SP_VS, TP0_CTX0_3D_CVS_REG, PIPE_BR, 0, USPTP,
+		gen7_0_0_tpl1_cluster_sp_vs_pipe_br_registers, 0xb000 },
+	{ CLUSTER_SP_VS, TP0_CTX0_3D_CVS_REG, PIPE_BV, 0, USPTP,
+		gen7_0_0_tpl1_cluster_sp_vs_pipe_bv_registers, 0xb000 },
+	{ CLUSTER_SP_VS, TP0_CTX1_3D_CVS_REG, PIPE_BR, 1, USPTP,
+		gen7_0_0_tpl1_cluster_sp_vs_pipe_br_registers, 0xb000 },
+	{ CLUSTER_SP_VS, TP0_CTX1_3D_CVS_REG, PIPE_BV, 1, USPTP,
+		gen7_0_0_tpl1_cluster_sp_vs_pipe_bv_registers, 0xb000 },
+};
+
+static const u32 gen7_2_0_dbgc_registers[] = {
+	0x005ff, 0x0061c, 0x0061e, 0x00634, 0x00640, 0x0065e, 0x00679, 0x0067e,
+	0x00699, 0x00699, 0x0069b, 0x0069e, 0x006a0, 0x006a3, 0x006c0, 0x006c1,
+	0x18400, 0x1841c, 0x1841e, 0x18434, 0x18440, 0x1845c, 0x18479, 0x1847c,
+	0x18580, 0x18581,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_dbgc_registers), 8));
+
+static const u32 gen7_2_0_rscc_registers[] = {
+	0x14000, 0x14036, 0x14040, 0x14047, 0x14080, 0x14084, 0x14089, 0x1408c,
+	0x14091, 0x14094, 0x14099, 0x1409c, 0x140a1, 0x140a4, 0x140a9, 0x140ac,
+	0x14100, 0x14104, 0x14114, 0x14119, 0x14124, 0x14132, 0x14154, 0x1416b,
+	0x14340, 0x14342, 0x14344, 0x1437c, 0x143f0, 0x143f8, 0x143fa, 0x143fe,
+	0x14400, 0x14404, 0x14406, 0x1440a, 0x1440c, 0x14410, 0x14412, 0x14416,
+	0x14418, 0x1441c, 0x1441e, 0x14422, 0x14424, 0x14424, 0x14498, 0x144a0,
+	0x144a2, 0x144a6, 0x144a8, 0x144ac, 0x144ae, 0x144b2, 0x144b4, 0x144b8,
+	0x144ba, 0x144be, 0x144c0, 0x144c4, 0x144c6, 0x144ca, 0x144cc, 0x144cc,
+	0x14540, 0x14548, 0x1454a, 0x1454e, 0x14550, 0x14554, 0x14556, 0x1455a,
+	0x1455c, 0x14560, 0x14562, 0x14566, 0x14568, 0x1456c, 0x1456e, 0x14572,
+	0x14574, 0x14574, 0x145e8, 0x145f0, 0x145f2, 0x145f6, 0x145f8, 0x145fc,
+	0x145fe, 0x14602, 0x14604, 0x14608, 0x1460a, 0x1460e, 0x14610, 0x14614,
+	0x14616, 0x1461a, 0x1461c, 0x1461c, 0x14690, 0x14698, 0x1469a, 0x1469e,
+	0x146a0, 0x146a4, 0x146a6, 0x146aa, 0x146ac, 0x146b0, 0x146b2, 0x146b6,
+	0x146b8, 0x146bc, 0x146be, 0x146c2, 0x146c4, 0x146c4, 0x14738, 0x14740,
+	0x14742, 0x14746, 0x14748, 0x1474c, 0x1474e, 0x14752, 0x14754, 0x14758,
+	0x1475a, 0x1475e, 0x14760, 0x14764, 0x14766, 0x1476a, 0x1476c, 0x1476c,
+	0x147e0, 0x147e8, 0x147ea, 0x147ee, 0x147f0, 0x147f4, 0x147f6, 0x147fa,
+	0x147fc, 0x14800, 0x14802, 0x14806, 0x14808, 0x1480c, 0x1480e, 0x14812,
+	0x14814, 0x14814, 0x14888, 0x14890, 0x14892, 0x14896, 0x14898, 0x1489c,
+	0x1489e, 0x148a2, 0x148a4, 0x148a8, 0x148aa, 0x148ae, 0x148b0, 0x148b4,
+	0x148b6, 0x148ba, 0x148bc, 0x148bc, 0x14930, 0x14938, 0x1493a, 0x1493e,
+	0x14940, 0x14944, 0x14946, 0x1494a, 0x1494c, 0x14950, 0x14952, 0x14956,
+	0x14958, 0x1495c, 0x1495e, 0x14962, 0x14964, 0x14964,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_rscc_registers), 8));
+
+static const u32 gen7_2_0_cpr_registers[] = {
+	0x26800, 0x26805, 0x26808, 0x2680c, 0x26814, 0x26814, 0x2681c, 0x2681c,
+	0x26820, 0x26838, 0x26840, 0x26840, 0x26848, 0x26848, 0x26850, 0x26850,
+	0x26880, 0x2689e, 0x26980, 0x269b0, 0x269c0, 0x269c8, 0x269e0, 0x269ee,
+	0x269fb, 0x269ff, 0x26a02, 0x26a07, 0x26a09, 0x26a0b, 0x26a10, 0x26b0f,
+	0x27440, 0x27441, 0x27444, 0x27444, 0x27480, 0x274a2, 0x274ac, 0x274ad,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_cpr_registers), 8));
+
+static const u32 gen7_2_0_dpm_lkg_registers[] = {
+	0x21c00, 0x21c00, 0x21c08, 0x21c09, 0x21c0e, 0x21c0f, 0x21c4f, 0x21c50,
+	0x21c52, 0x21c52, 0x21c54, 0x21c56, 0x21c58, 0x21c5a, 0x21c5c, 0x21c60,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_dpm_lkg_registers), 8));
+
+static const u32 gen7_2_0_gpucc_registers[] = {
+	0x24000, 0x2400f, 0x24400, 0x2440f, 0x24800, 0x24805, 0x24c00, 0x24cff,
+	0x25400, 0x25404, 0x25800, 0x25804, 0x25c00, 0x25c04, 0x26000, 0x26004,
+	0x26400, 0x26405, 0x26414, 0x2641d, 0x2642a, 0x26430, 0x26432, 0x26433,
+	0x26441, 0x2644b, 0x2644d, 0x26457, 0x26466, 0x26468, 0x26478, 0x2647a,
+	0x26489, 0x2648a, 0x2649c, 0x2649e, 0x264a0, 0x264a4, 0x264c5, 0x264c7,
+	0x264d6, 0x264d8, 0x264e8, 0x264e9, 0x264f9, 0x264fc, 0x2651c, 0x2651e,
+	0x26540, 0x26576, 0x26600, 0x26616, 0x26620, 0x2662d, 0x26630, 0x26631,
+	0x26635, 0x26635, 0x26637, 0x26637, 0x2663a, 0x2663a, 0x26642, 0x26642,
+	0x26656, 0x26658, 0x2665b, 0x2665d, 0x2665f, 0x26662,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_gpucc_registers), 8));
+
+static const u32 gen7_2_0_cx_misc_registers[] = {
+	0x27800, 0x27800, 0x27810, 0x27814, 0x27820, 0x27824, 0x27832, 0x27857,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_cx_misc_registers), 8));
+
+static const u32 gen7_2_0_dpm_registers[] = {
+	0x1aa00, 0x1aa06, 0x1aa09, 0x1aa0a, 0x1aa0c, 0x1aa0d, 0x1aa0f, 0x1aa12,
+	0x1aa14, 0x1aa47, 0x1aa50, 0x1aa51,
+	UINT_MAX, UINT_MAX,
+};
+static_assert(IS_ALIGNED(sizeof(gen7_2_0_dpm_registers), 8));
+
+static struct gen7_reg_list gen7_2_0_reg_list[] = {
+	{ gen7_2_0_gpu_registers, NULL },
+	{ gen7_2_0_dpm_registers, NULL },
+	{ gen7_2_0_dbgc_registers, NULL },
+	{ NULL, NULL },
+};
+
+static const u32 *gen7_2_0_external_core_regs[] = {
+	gen7_2_0_gpucc_registers,
+	gen7_2_0_cpr_registers,
+	gen7_2_0_dpm_lkg_registers,
+};
+#endif /*_ADRENO_GEN7_2_0_SNAPSHOT_H */
--- a/qcom/opensource/graphics-kernel/adreno_gen7_9_0_snapshot.h
+++ b/qcom/opensource/graphics-kernel/adreno_gen7_9_0_snapshot.h
--- a/qcom/opensource/graphics-kernel/adreno_gen7_coresight.c
+++ b/qcom/opensource/graphics-kernel/adreno_gen7_coresight.c
@ -0,0 +1,473 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/amba/bus.h>
+
+#include "adreno.h"
+#include "adreno_gen7.h"
+#include "adreno_coresight.h"
+
+static struct adreno_coresight_register gen7_coresight_regs[] = {
+	{ GEN7_DBGC_CFG_DBGBUS_SEL_A },
+	{ GEN7_DBGC_CFG_DBGBUS_SEL_B },
+	{ GEN7_DBGC_CFG_DBGBUS_SEL_C },
+	{ GEN7_DBGC_CFG_DBGBUS_SEL_D },
+	{ GEN7_DBGC_CFG_DBGBUS_CNTLT },
+	{ GEN7_DBGC_CFG_DBGBUS_CNTLM },
+	{ GEN7_DBGC_CFG_DBGBUS_OPL },
+	{ GEN7_DBGC_CFG_DBGBUS_OPE },
+	{ GEN7_DBGC_CFG_DBGBUS_IVTL_0 },
+	{ GEN7_DBGC_CFG_DBGBUS_IVTL_1 },
+	{ GEN7_DBGC_CFG_DBGBUS_IVTL_2 },
+	{ GEN7_DBGC_CFG_DBGBUS_IVTL_3 },
+	{ GEN7_DBGC_CFG_DBGBUS_MASKL_0 },
+	{ GEN7_DBGC_CFG_DBGBUS_MASKL_1 },
+	{ GEN7_DBGC_CFG_DBGBUS_MASKL_2 },
+	{ GEN7_DBGC_CFG_DBGBUS_MASKL_3 },
+	{ GEN7_DBGC_CFG_DBGBUS_BYTEL_0 },
+	{ GEN7_DBGC_CFG_DBGBUS_BYTEL_1 },
+	{ GEN7_DBGC_CFG_DBGBUS_IVTE_0 },
+	{ GEN7_DBGC_CFG_DBGBUS_IVTE_1 },
+	{ GEN7_DBGC_CFG_DBGBUS_IVTE_2 },
+	{ GEN7_DBGC_CFG_DBGBUS_IVTE_3 },
+	{ GEN7_DBGC_CFG_DBGBUS_MASKE_0 },
+	{ GEN7_DBGC_CFG_DBGBUS_MASKE_1 },
+	{ GEN7_DBGC_CFG_DBGBUS_MASKE_2 },
+	{ GEN7_DBGC_CFG_DBGBUS_MASKE_3 },
+	{ GEN7_DBGC_CFG_DBGBUS_NIBBLEE },
+	{ GEN7_DBGC_CFG_DBGBUS_PTRC0 },
+	{ GEN7_DBGC_CFG_DBGBUS_PTRC1 },
+	{ GEN7_DBGC_CFG_DBGBUS_LOADREG },
+	{ GEN7_DBGC_CFG_DBGBUS_IDX },
+	{ GEN7_DBGC_CFG_DBGBUS_CLRC },
+	{ GEN7_DBGC_CFG_DBGBUS_LOADIVT },
+	{ GEN7_DBGC_VBIF_DBG_CNTL },
+	{ GEN7_DBGC_DBG_LO_HI_GPIO },
+	{ GEN7_DBGC_EXT_TRACE_BUS_CNTL },
+	{ GEN7_DBGC_READ_AHB_THROUGH_DBG },
+	{ GEN7_DBGC_CFG_DBGBUS_TRACE_BUF1 },
+	{ GEN7_DBGC_CFG_DBGBUS_TRACE_BUF2 },
+	{ GEN7_DBGC_EVT_CFG },
+	{ GEN7_DBGC_EVT_INTF_SEL_0 },
+	{ GEN7_DBGC_EVT_INTF_SEL_1 },
+	{ GEN7_DBGC_PERF_ATB_CFG },
+	{ GEN7_DBGC_PERF_ATB_COUNTER_SEL_0 },
+	{ GEN7_DBGC_PERF_ATB_COUNTER_SEL_1 },
+	{ GEN7_DBGC_PERF_ATB_COUNTER_SEL_2 },
+	{ GEN7_DBGC_PERF_ATB_COUNTER_SEL_3 },
+	{ GEN7_DBGC_PERF_ATB_TRIG_INTF_SEL_0 },
+	{ GEN7_DBGC_PERF_ATB_TRIG_INTF_SEL_1 },
+	{ GEN7_DBGC_PERF_ATB_DRAIN_CMD },
+	{ GEN7_DBGC_ECO_CNTL },
+	{ GEN7_DBGC_AHB_DBG_CNTL },
+};
+
+static struct adreno_coresight_register gen7_coresight_regs_cx[] = {
+	{ GEN7_CX_DBGC_CFG_DBGBUS_SEL_A },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_SEL_B },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_SEL_C },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_SEL_D },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_CNTLT },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_CNTLM },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_OPL },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_OPE },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_IVTL_0 },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_IVTL_1 },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_IVTL_2 },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_IVTL_3 },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_MASKL_0 },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_MASKL_1 },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_MASKL_2 },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_MASKL_3 },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_BYTEL_0 },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_BYTEL_1 },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_IVTE_0 },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_IVTE_1 },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_IVTE_2 },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_IVTE_3 },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_MASKE_0 },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_MASKE_1 },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_MASKE_2 },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_MASKE_3 },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_NIBBLEE },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_PTRC0 },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_PTRC1 },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_LOADREG },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_IDX },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_CLRC },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_LOADIVT },
+	{ GEN7_CX_DBGC_VBIF_DBG_CNTL },
+	{ GEN7_CX_DBGC_DBG_LO_HI_GPIO },
+	{ GEN7_CX_DBGC_EXT_TRACE_BUS_CNTL },
+	{ GEN7_CX_DBGC_READ_AHB_THROUGH_DBG },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_TRACE_BUF1 },
+	{ GEN7_CX_DBGC_CFG_DBGBUS_TRACE_BUF2 },
+	{ GEN7_CX_DBGC_EVT_CFG },
+	{ GEN7_CX_DBGC_EVT_INTF_SEL_0 },
+	{ GEN7_CX_DBGC_EVT_INTF_SEL_1 },
+	{ GEN7_CX_DBGC_PERF_ATB_CFG },
+	{ GEN7_CX_DBGC_PERF_ATB_COUNTER_SEL_0 },
+	{ GEN7_CX_DBGC_PERF_ATB_COUNTER_SEL_1 },
+	{ GEN7_CX_DBGC_PERF_ATB_COUNTER_SEL_2 },
+	{ GEN7_CX_DBGC_PERF_ATB_COUNTER_SEL_3 },
+	{ GEN7_CX_DBGC_PERF_ATB_TRIG_INTF_SEL_0 },
+	{ GEN7_CX_DBGC_PERF_ATB_TRIG_INTF_SEL_1 },
+	{ GEN7_CX_DBGC_PERF_ATB_DRAIN_CMD },
+	{ GEN7_CX_DBGC_ECO_CNTL },
+	{ GEN7_CX_DBGC_AHB_DBG_CNTL },
+};
+
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_a, &gen7_coresight_regs[0]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_b, &gen7_coresight_regs[1]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_c, &gen7_coresight_regs[2]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_d, &gen7_coresight_regs[3]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_cntlt, &gen7_coresight_regs[4]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_cntlm, &gen7_coresight_regs[5]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_opl, &gen7_coresight_regs[6]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ope, &gen7_coresight_regs[7]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_0, &gen7_coresight_regs[8]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_1, &gen7_coresight_regs[9]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_2, &gen7_coresight_regs[10]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_3, &gen7_coresight_regs[11]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_0, &gen7_coresight_regs[12]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_1, &gen7_coresight_regs[13]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_2, &gen7_coresight_regs[14]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_3, &gen7_coresight_regs[15]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_bytel_0, &gen7_coresight_regs[16]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_bytel_1, &gen7_coresight_regs[17]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_0, &gen7_coresight_regs[18]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_1, &gen7_coresight_regs[19]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_2, &gen7_coresight_regs[20]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_3, &gen7_coresight_regs[21]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_0, &gen7_coresight_regs[22]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_1, &gen7_coresight_regs[23]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_2, &gen7_coresight_regs[24]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_3, &gen7_coresight_regs[25]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_nibblee, &gen7_coresight_regs[26]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ptrc0, &gen7_coresight_regs[27]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ptrc1, &gen7_coresight_regs[28]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_loadreg, &gen7_coresight_regs[29]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_idx, &gen7_coresight_regs[30]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_clrc, &gen7_coresight_regs[31]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_loadivt, &gen7_coresight_regs[32]);
+static ADRENO_CORESIGHT_ATTR(vbif_dbg_cntl, &gen7_coresight_regs[33]);
+static ADRENO_CORESIGHT_ATTR(dbg_lo_hi_gpio, &gen7_coresight_regs[34]);
+static ADRENO_CORESIGHT_ATTR(ext_trace_bus_cntl, &gen7_coresight_regs[35]);
+static ADRENO_CORESIGHT_ATTR(read_ahb_through_dbg, &gen7_coresight_regs[36]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf1, &gen7_coresight_regs[37]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf2, &gen7_coresight_regs[38]);
+static ADRENO_CORESIGHT_ATTR(evt_cfg, &gen7_coresight_regs[39]);
+static ADRENO_CORESIGHT_ATTR(evt_intf_sel_0, &gen7_coresight_regs[40]);
+static ADRENO_CORESIGHT_ATTR(evt_intf_sel_1, &gen7_coresight_regs[41]);
+static ADRENO_CORESIGHT_ATTR(perf_atb_cfg, &gen7_coresight_regs[42]);
+static ADRENO_CORESIGHT_ATTR(perf_atb_counter_sel_0, &gen7_coresight_regs[43]);
+static ADRENO_CORESIGHT_ATTR(perf_atb_counter_sel_1, &gen7_coresight_regs[44]);
+static ADRENO_CORESIGHT_ATTR(perf_atb_counter_sel_2, &gen7_coresight_regs[45]);
+static ADRENO_CORESIGHT_ATTR(perf_atb_counter_sel_3, &gen7_coresight_regs[46]);
+static ADRENO_CORESIGHT_ATTR(perf_atb_trig_intf_sel_0,
+				&gen7_coresight_regs[47]);
+static ADRENO_CORESIGHT_ATTR(perf_atb_trig_intf_sel_1,
+				&gen7_coresight_regs[48]);
+static ADRENO_CORESIGHT_ATTR(perf_atb_drain_cmd, &gen7_coresight_regs[49]);
+static ADRENO_CORESIGHT_ATTR(eco_cntl, &gen7_coresight_regs[50]);
+static ADRENO_CORESIGHT_ATTR(ahb_dbg_cntl, &gen7_coresight_regs[51]);
+
+/*CX debug registers*/
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_sel_a,
+				&gen7_coresight_regs_cx[0]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_sel_b,
+				&gen7_coresight_regs_cx[1]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_sel_c,
+				&gen7_coresight_regs_cx[2]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_sel_d,
+				&gen7_coresight_regs_cx[3]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_cntlt,
+				&gen7_coresight_regs_cx[4]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_cntlm,
+				&gen7_coresight_regs_cx[5]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_opl,
+				&gen7_coresight_regs_cx[6]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ope,
+				&gen7_coresight_regs_cx[7]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivtl_0,
+				&gen7_coresight_regs_cx[8]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivtl_1,
+				&gen7_coresight_regs_cx[9]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivtl_2,
+				&gen7_coresight_regs_cx[10]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivtl_3,
+				&gen7_coresight_regs_cx[11]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maskl_0,
+				&gen7_coresight_regs_cx[12]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maskl_1,
+				&gen7_coresight_regs_cx[13]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maskl_2,
+				&gen7_coresight_regs_cx[14]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maskl_3,
+				&gen7_coresight_regs_cx[15]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_bytel_0,
+				&gen7_coresight_regs_cx[16]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_bytel_1,
+				&gen7_coresight_regs_cx[17]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivte_0,
+				&gen7_coresight_regs_cx[18]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivte_1,
+				&gen7_coresight_regs_cx[19]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivte_2,
+				&gen7_coresight_regs_cx[20]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ivte_3,
+				&gen7_coresight_regs_cx[21]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maske_0,
+				&gen7_coresight_regs_cx[22]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maske_1,
+				&gen7_coresight_regs_cx[23]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maske_2,
+				&gen7_coresight_regs_cx[24]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_maske_3,
+				&gen7_coresight_regs_cx[25]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_nibblee,
+				&gen7_coresight_regs_cx[26]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ptrc0,
+				&gen7_coresight_regs_cx[27]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_ptrc1,
+				&gen7_coresight_regs_cx[28]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_loadreg,
+				&gen7_coresight_regs_cx[29]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_idx,
+				&gen7_coresight_regs_cx[30]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_clrc,
+				&gen7_coresight_regs_cx[31]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_loadivt,
+				&gen7_coresight_regs_cx[32]);
+static ADRENO_CORESIGHT_ATTR(cx_vbif_dbg_cntl,
+				&gen7_coresight_regs_cx[33]);
+static ADRENO_CORESIGHT_ATTR(cx_dbg_lo_hi_gpio,
+				&gen7_coresight_regs_cx[34]);
+static ADRENO_CORESIGHT_ATTR(cx_ext_trace_bus_cntl,
+				&gen7_coresight_regs_cx[35]);
+static ADRENO_CORESIGHT_ATTR(cx_read_ahb_through_dbg,
+				&gen7_coresight_regs_cx[36]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_trace_buf1,
+				&gen7_coresight_regs_cx[37]);
+static ADRENO_CORESIGHT_ATTR(cx_cfg_dbgbus_trace_buf2,
+				&gen7_coresight_regs_cx[38]);
+static ADRENO_CORESIGHT_ATTR(cx_evt_cfg,
+				&gen7_coresight_regs_cx[39]);
+static ADRENO_CORESIGHT_ATTR(cx_evt_intf_sel_0,
+				&gen7_coresight_regs_cx[40]);
+static ADRENO_CORESIGHT_ATTR(cx_evt_intf_sel_1,
+				&gen7_coresight_regs_cx[41]);
+static ADRENO_CORESIGHT_ATTR(cx_perf_atb_cfg,
+				&gen7_coresight_regs_cx[42]);
+static ADRENO_CORESIGHT_ATTR(cx_perf_atb_counter_sel_0,
+				&gen7_coresight_regs_cx[43]);
+static ADRENO_CORESIGHT_ATTR(cx_perf_atb_counter_sel_1,
+				&gen7_coresight_regs_cx[44]);
+static ADRENO_CORESIGHT_ATTR(cx_perf_atb_counter_sel_2,
+				&gen7_coresight_regs_cx[45]);
+static ADRENO_CORESIGHT_ATTR(cx_perf_atb_counter_sel_3,
+				&gen7_coresight_regs_cx[46]);
+static ADRENO_CORESIGHT_ATTR(cx_perf_atb_trig_intf_sel_0,
+				&gen7_coresight_regs_cx[47]);
+static ADRENO_CORESIGHT_ATTR(cx_perf_atb_trig_intf_sel_1,
+				&gen7_coresight_regs_cx[48]);
+static ADRENO_CORESIGHT_ATTR(cx_perf_atb_drain_cmd,
+				&gen7_coresight_regs_cx[49]);
+static ADRENO_CORESIGHT_ATTR(cx_eco_cntl,
+				&gen7_coresight_regs_cx[50]);
+static ADRENO_CORESIGHT_ATTR(cx_ahb_dbg_cntl,
+				&gen7_coresight_regs_cx[51]);
+
+static struct attribute *gen7_coresight_attrs[] = {
+	&coresight_attr_cfg_dbgbus_sel_a.attr.attr,
+	&coresight_attr_cfg_dbgbus_sel_b.attr.attr,
+	&coresight_attr_cfg_dbgbus_sel_c.attr.attr,
+	&coresight_attr_cfg_dbgbus_sel_d.attr.attr,
+	&coresight_attr_cfg_dbgbus_cntlt.attr.attr,
+	&coresight_attr_cfg_dbgbus_cntlm.attr.attr,
+	&coresight_attr_cfg_dbgbus_opl.attr.attr,
+	&coresight_attr_cfg_dbgbus_ope.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivtl_0.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivtl_1.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivtl_2.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivtl_3.attr.attr,
+	&coresight_attr_cfg_dbgbus_maskl_0.attr.attr,
+	&coresight_attr_cfg_dbgbus_maskl_1.attr.attr,
+	&coresight_attr_cfg_dbgbus_maskl_2.attr.attr,
+	&coresight_attr_cfg_dbgbus_maskl_3.attr.attr,
+	&coresight_attr_cfg_dbgbus_bytel_0.attr.attr,
+	&coresight_attr_cfg_dbgbus_bytel_1.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivte_0.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivte_1.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivte_2.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivte_3.attr.attr,
+	&coresight_attr_cfg_dbgbus_maske_0.attr.attr,
+	&coresight_attr_cfg_dbgbus_maske_1.attr.attr,
+	&coresight_attr_cfg_dbgbus_maske_2.attr.attr,
+	&coresight_attr_cfg_dbgbus_maske_3.attr.attr,
+	&coresight_attr_cfg_dbgbus_nibblee.attr.attr,
+	&coresight_attr_cfg_dbgbus_ptrc0.attr.attr,
+	&coresight_attr_cfg_dbgbus_ptrc1.attr.attr,
+	&coresight_attr_cfg_dbgbus_loadreg.attr.attr,
+	&coresight_attr_cfg_dbgbus_idx.attr.attr,
+	&coresight_attr_cfg_dbgbus_clrc.attr.attr,
+	&coresight_attr_cfg_dbgbus_loadivt.attr.attr,
+	&coresight_attr_vbif_dbg_cntl.attr.attr,
+	&coresight_attr_dbg_lo_hi_gpio.attr.attr,
+	&coresight_attr_ext_trace_bus_cntl.attr.attr,
+	&coresight_attr_read_ahb_through_dbg.attr.attr,
+	&coresight_attr_cfg_dbgbus_trace_buf1.attr.attr,
+	&coresight_attr_cfg_dbgbus_trace_buf2.attr.attr,
+	&coresight_attr_evt_cfg.attr.attr,
+	&coresight_attr_evt_intf_sel_0.attr.attr,
+	&coresight_attr_evt_intf_sel_1.attr.attr,
+	&coresight_attr_perf_atb_cfg.attr.attr,
+	&coresight_attr_perf_atb_counter_sel_0.attr.attr,
+	&coresight_attr_perf_atb_counter_sel_1.attr.attr,
+	&coresight_attr_perf_atb_counter_sel_2.attr.attr,
+	&coresight_attr_perf_atb_counter_sel_3.attr.attr,
+	&coresight_attr_perf_atb_trig_intf_sel_0.attr.attr,
+	&coresight_attr_perf_atb_trig_intf_sel_1.attr.attr,
+	&coresight_attr_perf_atb_drain_cmd.attr.attr,
+	&coresight_attr_eco_cntl.attr.attr,
+	&coresight_attr_ahb_dbg_cntl.attr.attr,
+	NULL,
+};
+
+/*cx*/
+static struct attribute *gen7_coresight_attrs_cx[] = {
+	&coresight_attr_cx_cfg_dbgbus_sel_a.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_sel_b.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_sel_c.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_sel_d.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_cntlt.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_cntlm.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_opl.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_ope.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_ivtl_0.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_ivtl_1.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_ivtl_2.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_ivtl_3.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_maskl_0.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_maskl_1.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_maskl_2.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_maskl_3.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_bytel_0.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_bytel_1.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_ivte_0.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_ivte_1.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_ivte_2.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_ivte_3.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_maske_0.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_maske_1.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_maske_2.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_maske_3.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_nibblee.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_ptrc0.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_ptrc1.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_loadreg.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_idx.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_clrc.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_loadivt.attr.attr,
+	&coresight_attr_cx_vbif_dbg_cntl.attr.attr,
+	&coresight_attr_cx_dbg_lo_hi_gpio.attr.attr,
+	&coresight_attr_cx_ext_trace_bus_cntl.attr.attr,
+	&coresight_attr_cx_read_ahb_through_dbg.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_trace_buf1.attr.attr,
+	&coresight_attr_cx_cfg_dbgbus_trace_buf2.attr.attr,
+	&coresight_attr_cx_evt_cfg.attr.attr,
+	&coresight_attr_cx_evt_intf_sel_0.attr.attr,
+	&coresight_attr_cx_evt_intf_sel_1.attr.attr,
+	&coresight_attr_cx_perf_atb_cfg.attr.attr,
+	&coresight_attr_cx_perf_atb_counter_sel_0.attr.attr,
+	&coresight_attr_cx_perf_atb_counter_sel_1.attr.attr,
+	&coresight_attr_cx_perf_atb_counter_sel_2.attr.attr,
+	&coresight_attr_cx_perf_atb_counter_sel_3.attr.attr,
+	&coresight_attr_cx_perf_atb_trig_intf_sel_0.attr.attr,
+	&coresight_attr_cx_perf_atb_trig_intf_sel_1.attr.attr,
+	&coresight_attr_cx_perf_atb_drain_cmd.attr.attr,
+	&coresight_attr_cx_eco_cntl.attr.attr,
+	&coresight_attr_cx_ahb_dbg_cntl.attr.attr,
+	NULL,
+};
+
+static const struct attribute_group gen7_coresight_group = {
+	.attrs = gen7_coresight_attrs,
+};
+
+static const struct attribute_group *gen7_coresight_groups[] = {
+	&gen7_coresight_group,
+	NULL,
+};
+
+static const struct attribute_group gen7_coresight_group_cx = {
+	.attrs = gen7_coresight_attrs_cx,
+};
+
+static const struct attribute_group *gen7_coresight_groups_cx[] = {
+	&gen7_coresight_group_cx,
+	NULL,
+};
+
+static const struct adreno_coresight gen7_coresight = {
+	.registers = gen7_coresight_regs,
+	.count = ARRAY_SIZE(gen7_coresight_regs),
+	.groups = gen7_coresight_groups,
+};
+
+static const struct adreno_coresight gen7_coresight_cx = {
+	.registers = gen7_coresight_regs_cx,
+	.count = ARRAY_SIZE(gen7_coresight_regs_cx),
+	.groups = gen7_coresight_groups_cx,
+};
+
+static int name_match(struct device *dev, void *data)
+{
+	char *child_name = data;
+
+	return strcmp(child_name, dev_name(dev)) == 0;
+}
+
+void gen7_coresight_init(struct adreno_device *adreno_dev)
+{
+	struct adreno_funnel_device *funnel_gfx = &adreno_dev->funnel_gfx;
+	struct device *amba_dev;
+
+	/* Find the amba funnel device associated with gfx coresight funnel */
+	amba_dev = bus_find_device_by_name(&amba_bustype, NULL, "10963000.funnel");
+	if (!amba_dev)
+		return;
+
+	funnel_gfx->funnel_dev = device_find_child(amba_dev, "coresight-funnel-gfx", name_match);
+	if (funnel_gfx->funnel_dev == NULL)
+		return;
+
+	funnel_gfx->funnel_csdev = to_coresight_device(funnel_gfx->funnel_dev);
+	if (funnel_gfx->funnel_csdev == NULL)
+		return;
+
+	/*
+	 * Since coresight_funnel_gfx component is in graphics block, GPU has to be powered up
+	 * before enabling the funnel. Currently the generic coresight driver doesnt handle that.
+	 * Override the funnel ops set by coresight driver with graphics funnel ops, so that the
+	 * GPU can be brought up before enabling the funnel.
+	 */
+	funnel_gfx->funnel_ops = funnel_gfx->funnel_csdev->ops;
+	funnel_gfx->funnel_csdev->ops = NULL;
+
+	/*
+	 * The read-only sysfs node (funnel_ctrl) associated with gfx funnel reads the control
+	 * register and could cause a NOC error when gpu is in slumber. Since we do not require
+	 * such node, remove the attribute groups for the funnel.
+	 */
+	sysfs_remove_groups(&funnel_gfx->funnel_dev->kobj, funnel_gfx->funnel_csdev->dev.groups);
+
+	adreno_coresight_add_device(adreno_dev, "qcom,gpu-coresight-gx",
+		&gen7_coresight, &adreno_dev->gx_coresight);
+
+	adreno_coresight_add_device(adreno_dev, "qcom,gpu-coresight-cx",
+		&gen7_coresight_cx, &adreno_dev->cx_coresight);
+}
--- a/qcom/opensource/graphics-kernel/adreno_gen7_gmu.c
+++ b/qcom/opensource/graphics-kernel/adreno_gen7_gmu.c
--- a/qcom/opensource/graphics-kernel/adreno_gen7_gmu.h
+++ b/qcom/opensource/graphics-kernel/adreno_gen7_gmu.h
@ -0,0 +1,510 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+#ifndef __ADRENO_GEN7_GMU_H
+#define __ADRENO_GEN7_GMU_H
+
+#include <linux/mailbox_client.h>
+
+#include "adreno_gen7_hfi.h"
+#include "kgsl_gmu_core.h"
+
+struct gen7_dcvs_table {
+	u32 gpu_level_num;
+	u32 gmu_level_num;
+	struct opp_gx_desc gx_votes[MAX_GX_LEVELS];
+	struct opp_desc cx_votes[MAX_CX_LEVELS];
+};
+
+/**
+ * struct gen7_gmu_device - GMU device structure
+ * @ver: GMU Version information
+ * @irq: GMU interrupt number
+ * @fw_image: GMU FW image
+ * @hfi_mem: pointer to HFI shared memory
+ * @dump_mem: pointer to GMU debug dump memory
+ * @gmu_log: gmu event log memory
+ * @hfi: HFI controller
+ * @num_gpupwrlevels: number GPU frequencies in GPU freq table
+ * @num_bwlevel: number of GPU BW levels
+ * @num_cnocbwlevel: number CNOC BW levels
+ * @rpmh_votes: RPMh TCS command set for GPU, GMU voltage and bw scaling
+ * @clks: GPU subsystem clocks required for GMU functionality
+ * @wakeup_pwrlevel: GPU wake up power/DCVS level in case different
+ *  than default power level
+ * @idle_level: Minimal GPU idle power level
+ * @fault_count: GMU fault count
+ * @mailbox: Messages to AOP for ACD enable/disable go through this
+ * @log_wptr_retention: Store the log wptr offset on slumber
+ */
+struct gen7_gmu_device {
+	struct {
+		u32 core;
+		u32 core_dev;
+		u32 pwr;
+		u32 pwr_dev;
+		u32 hfi;
+	} ver;
+	struct platform_device *pdev;
+	int irq;
+	const struct firmware *fw_image;
+	struct kgsl_memdesc *dump_mem;
+	struct kgsl_memdesc *gmu_log;
+	/** @gmu_init_scratch: Memory to store the initial HFI messages */
+	struct kgsl_memdesc *gmu_init_scratch;
+	/** @gpu_boot_scratch: Memory to store the bootup HFI messages */
+	struct kgsl_memdesc *gpu_boot_scratch;
+	/** @vrb: GMU virtual register bank memory */
+	struct kgsl_memdesc *vrb;
+	/** @trace: gmu trace container */
+	struct kgsl_gmu_trace trace;
+	struct gen7_hfi hfi;
+	struct clk_bulk_data *clks;
+	/** @num_clks: Number of entries in the @clks array */
+	int num_clks;
+	unsigned int idle_level;
+	/** @freqs: Array of GMU frequencies */
+	u32 freqs[GMU_MAX_PWRLEVELS];
+	/** @vlvls: Array of GMU voltage levels */
+	u32 vlvls[GMU_MAX_PWRLEVELS];
+	struct kgsl_mailbox mailbox;
+	/** @gmu_globals: Array to store gmu global buffers */
+	struct kgsl_memdesc gmu_globals[GMU_KERNEL_ENTRIES];
+	/** @global_entries: To keep track of number of gmu buffers */
+	u32 global_entries;
+	struct gmu_vma_entry *vma;
+	unsigned int log_wptr_retention;
+	/** @cm3_fault: whether gmu received a cm3 fault interrupt */
+	atomic_t cm3_fault;
+	/**
+	 * @itcm_shadow: Copy of the itcm block in firmware binary used for
+	 * snapshot
+	 */
+	void *itcm_shadow;
+	/** @flags: Internal gmu flags */
+	unsigned long flags;
+	/** @rscc_virt: Pointer where RSCC block is mapped */
+	void __iomem *rscc_virt;
+	/** @domain: IOMMU domain for the kernel context */
+	struct iommu_domain *domain;
+	/** @log_stream_enable: GMU log streaming enable. Disabled by default */
+	bool log_stream_enable;
+	/** @log_group_mask: Allows overriding default GMU log group mask */
+	u32 log_group_mask;
+	struct kobject log_kobj;
+	/*
+	 * @perf_ddr_bw: The lowest ddr bandwidth that puts CX at a corner at
+	 * which GMU can run at higher frequency.
+	 */
+	u32 perf_ddr_bw;
+	/** @rdpm_cx_virt: Pointer where the RDPM CX block is mapped */
+	void __iomem *rdpm_cx_virt;
+	/** @rdpm_mx_virt: Pointer where the RDPM MX block is mapped */
+	void __iomem *rdpm_mx_virt;
+	/** @num_oob_perfcntr: Number of active oob_perfcntr requests */
+	u32 num_oob_perfcntr;
+	/** @acd_debug_val: DVM value to calibrate ACD for a level */
+	u32 acd_debug_val;
+	/** @stats_enable: GMU stats feature enable */
+	bool stats_enable;
+	/** @stats_mask: GMU performance countables to enable */
+	u32 stats_mask;
+	/** @stats_interval: GMU performance counters sampling interval */
+	u32 stats_interval;
+	/** @stats_kobj: kernel object for GMU stats directory in sysfs */
+	struct kobject stats_kobj;
+	/** @cp_init_hdr: raw command header for cp_init */
+	u32 cp_init_hdr;
+	/** @switch_to_unsec_hdr: raw command header for switch to unsecure packet */
+	u32 switch_to_unsec_hdr;
+	/** @dcvs_table: Table for gpu dcvs levels */
+	struct gen7_dcvs_table dcvs_table;
+};
+
+/* Helper function to get to gen7 gmu device from adreno device */
+struct gen7_gmu_device *to_gen7_gmu(struct adreno_device *adreno_dev);
+
+/* Helper function to get to adreno device from gen7 gmu device */
+struct adreno_device *gen7_gmu_to_adreno(struct gen7_gmu_device *gmu);
+
+/**
+ * gen7_reserve_gmu_kernel_block() - Allocate a global gmu buffer
+ * @gmu: Pointer to the gen7 gmu device
+ * @addr: Desired gmu virtual address
+ * @size: Size of the buffer in bytes
+ * @vma_id: Target gmu vma where this buffer should be mapped
+ * @align: Alignment for the GMU VA and GMU mapping size
+ *
+ * This function allocates a global gmu buffer and maps it in
+ * the desired gmu vma
+ *
+ * Return: Pointer to the memory descriptor or error pointer on failure
+ */
+struct kgsl_memdesc *gen7_reserve_gmu_kernel_block(struct gen7_gmu_device *gmu,
+		u32 addr, u32 size, u32 vma_id, u32 align);
+
+/**
+ * gen7_reserve_gmu_kernel_block_fixed() - Maps phyical resource address to gmu
+ * @gmu: Pointer to the gen7 gmu device
+ * @addr: Desired gmu virtual address
+ * @size: Size of the buffer in bytes
+ * @vma_id: Target gmu vma where this buffer should be mapped
+ * @resource: Name of the resource to get the size and address to allocate
+ * @attrs: Attributes for the mapping
+ * @align: Alignment for the GMU VA and GMU mapping size
+ *
+ * This function maps the physcial resource address to desired gmu vma
+ *
+ * Return: Pointer to the memory descriptor or error pointer on failure
+ */
+struct kgsl_memdesc *gen7_reserve_gmu_kernel_block_fixed(struct gen7_gmu_device *gmu,
+	u32 addr, u32 size, u32 vma_id, const char *resource, int attrs, u32 align);
+
+/**
+ * gen7_alloc_gmu_kernel_block() - Allocate a gmu buffer
+ * @gmu: Pointer to the gen7 gmu device
+ * @md: Pointer to the memdesc
+ * @size: Size of the buffer in bytes
+ * @vma_id: Target gmu vma where this buffer should be mapped
+ * @attrs: Attributes for the mapping
+ *
+ * This function allocates a buffer and maps it in the desired gmu vma
+ *
+ * Return: 0 on success or error code on failure
+ */
+int gen7_alloc_gmu_kernel_block(struct gen7_gmu_device *gmu,
+	struct kgsl_memdesc *md, u32 size, u32 vma_id, int attrs);
+
+/**
+ * gen7_gmu_import_buffer() - Import a gmu buffer
+ * @gmu: Pointer to the gen7 gmu device
+ * @vma_id: Target gmu vma where this buffer should be mapped
+ * @md: Pointer to the memdesc to be mapped
+ * @attrs: Attributes for the mapping
+ * @align: Alignment for the GMU VA and GMU mapping size
+ *
+ * This function imports and maps a buffer to a gmu vma
+ *
+ * Return: 0 on success or error code on failure
+ */
+int gen7_gmu_import_buffer(struct gen7_gmu_device *gmu, u32 vma_id,
+			struct kgsl_memdesc *md, u32 attrs, u32 align);
+
+/**
+ * gen7_free_gmu_block() - Free a gmu buffer
+ * @gmu: Pointer to the gen7 gmu device
+ * @md: Pointer to the memdesc that is to be freed
+ *
+ * This function frees a gmu block allocated by gen7_reserve_gmu_kernel_block()
+ */
+void gen7_free_gmu_block(struct gen7_gmu_device *gmu, struct kgsl_memdesc *md);
+
+/**
+ * gen7_build_rpmh_tables - Build the rpmh tables
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * This function creates the gpu dcvs and bw tables
+ *
+ * Return: 0 on success and negative error on failure
+ */
+int gen7_build_rpmh_tables(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_gmu_gx_is_on - Check if GX is on
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * This function reads pwr status registers to check if GX
+ * is on or off
+ */
+bool gen7_gmu_gx_is_on(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_gmu_device_probe - GEN7 GMU snapshot function
+ * @pdev: Pointer to the platform device
+ * @chipid: Chipid of the target
+ * @gpucore: Pointer to the gpucore
+ *
+ * The target specific probe function for gmu based gen7 targets.
+ */
+int gen7_gmu_device_probe(struct platform_device *pdev,
+		u32 chipid, const struct adreno_gpu_core *gpucore);
+
+/**
+ * gen7_gmu_reset - Reset and restart the gmu
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_gmu_reset(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_enable_gpu_irq - Enable gpu interrupt
+ * @adreno_dev: Pointer to the adreno device
+ */
+void gen7_enable_gpu_irq(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_disable_gpu_irq - Disable gpu interrupt
+ * @adreno_dev: Pointer to the adreno device
+ */
+void gen7_disable_gpu_irq(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_gmu_snapshot- Take snapshot for gmu targets
+ * @adreno_dev: Pointer to the adreno device
+ * @snapshot: Pointer to the snapshot structure
+ *
+ * Send an NMI to gmu if we hit a gmu fault. Then take gmu
+ * snapshot and carry on with rest of the gen7 snapshot
+ */
+void gen7_gmu_snapshot(struct adreno_device *adreno_dev,
+		struct kgsl_snapshot *snapshot);
+
+/**
+ * gen7_gmu_probe - Probe gen7 gmu resources
+ * @device: Pointer to the kgsl device
+ * @pdev: Pointer to the gmu platform device
+ *
+ * Probe the gmu and hfi resources
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_gmu_probe(struct kgsl_device *device,
+		struct platform_device *pdev);
+
+/**
+ * gen7_gmu_parse_fw - Parse the gmu fw binary
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_gmu_parse_fw(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_gmu_memory_init - Allocate gmu memory
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Allocates the gmu log buffer and others if ndeeded.
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_gmu_memory_init(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_gmu_aop_send_acd_state - Enable or disable acd feature in aop
+ * @gmu: Pointer to the gen7 gmu device
+ * @flag: Boolean to enable or disable acd in aop
+ *
+ * This function enables or disables gpu acd feature using mailbox
+ */
+void gen7_gmu_aop_send_acd_state(struct gen7_gmu_device *gmu, bool flag);
+
+/**
+ * gen7_gmu_load_fw - Load gmu firmware
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Loads the gmu firmware binary into TCMs and memory
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_gmu_load_fw(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_gmu_device_start - Bring gmu out of reset
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_gmu_device_start(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_gmu_hfi_start - Indicate hfi start to gmu
+ * @device: Pointer to the kgsl device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_gmu_hfi_start(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_gmu_itcm_shadow - Create itcm shadow copy for snapshot
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_gmu_itcm_shadow(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_gmu_register_config - gmu register configuration
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Program gmu regsiters based on features
+ */
+void gen7_gmu_register_config(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_gmu_version_info - Get gmu firmware version
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_gmu_version_info(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_gmu_irq_enable - Enable gmu interrupts
+ * @adreno_dev: Pointer to the adreno device
+ */
+void gen7_gmu_irq_enable(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_gmu_irq_disable - Disaable gmu interrupts
+ * @adreno_dev: Pointer to the adreno device
+ */
+void gen7_gmu_irq_disable(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_gmu_suspend - Hard reset the gpu and gmu
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * In case we hit a gmu fault, hard reset the gpu and gmu
+ * to recover from the fault
+ */
+void gen7_gmu_suspend(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_gmu_oob_set - send gmu oob request
+ * @device: Pointer to the kgsl device
+ * @req: Type of oob request as defined in enum oob_request
+ *
+ * Request gmu to keep gpu powered up till the oob is cleared
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_gmu_oob_set(struct kgsl_device *device, enum oob_request oob);
+
+/**
+ * gen7_gmu_oob_clear - clear an asserted oob request
+ * @device: Pointer to the kgsl device
+ * @req: Type of oob request as defined in enum oob_request
+ *
+ * Clear a previously requested oob so that gmu can power
+ * collapse the gpu
+ */
+void gen7_gmu_oob_clear(struct kgsl_device *device, enum oob_request oob);
+
+/**
+ * gen7_gmu_wait_for_lowest_idle - wait for gmu to complete ifpc
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * If ifpc is enabled, wait for gmu to put gpu into ifpc.
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_gmu_wait_for_lowest_idle(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_gmu_wait_for_idle - Wait for gmu to become idle
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_gmu_wait_for_idle(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_rscc_sleep_sequence - Trigger rscc sleep sequence
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_rscc_sleep_sequence(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_rscc_wakeup_sequence - Trigger rscc wakeup sequence
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_rscc_wakeup_sequence(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_halt_gbif - Halt CX and GX requests in GBIF
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Clear any pending GX or CX transactions in GBIF and
+ * deassert GBIF halt
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_halt_gbif(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_load_pdc_ucode - Load and enable pdc sequence
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_load_pdc_ucode(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_load_rsc_ucode - Load rscc sequence
+ * @adreno_dev: Pointer to the adreno device
+ */
+void gen7_load_rsc_ucode(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_gmu_remove - Clean up gmu probed resources
+ * @device: Pointer to the kgsl device
+ */
+void gen7_gmu_remove(struct kgsl_device *device);
+
+/**
+ * gen7_gmu_enable_clks - Enable gmu clocks
+ * @adreno_dev: Pointer to the adreno device
+ * @level: GMU frequency level
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_gmu_enable_clks(struct adreno_device *adreno_dev, u32 level);
+
+/**
+ * gen7_gmu_handle_watchdog - Handle watchdog interrupt
+ * @adreno_dev: Pointer to the adreno device
+ */
+void gen7_gmu_handle_watchdog(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_gmu_send_nmi - Send NMI to GMU
+ * @device: Pointer to the kgsl device
+ * @force: Boolean to forcefully send NMI irrespective of GMU state
+ */
+void gen7_gmu_send_nmi(struct kgsl_device *device, bool force);
+
+/**
+ * gen7_gmu_add_to_minidump - Register gen7_device with va minidump
+ * @adreno_dev: Pointer to the adreno device
+ */
+int gen7_gmu_add_to_minidump(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_snapshot_gmu_mem - Snapshot a GMU memory descriptor
+ * @device: Pointer to the kgsl device
+ * @buf: Destination snapshot buffer
+ * @remain: Remaining size of the snapshot buffer
+ * @priv: Opaque handle
+ *
+ * Return: Number of bytes written to snapshot buffer
+ */
+size_t gen7_snapshot_gmu_mem(struct kgsl_device *device,
+	u8 *buf, size_t remain, void *priv);
+
+/**
+ * gen7_bus_ab_quantize - Calculate the AB vote that needs to be sent to GMU
+ * @adreno_dev: Handle to the adreno device
+ * @ab: ab request that needs to be scaled in MBps
+ *
+ * Returns the AB value that needs to be prefixed to bandwidth vote in kbps
+ */
+u32 gen7_bus_ab_quantize(struct adreno_device *adreno_dev, u32 ab);
+
+#endif
--- a/qcom/opensource/graphics-kernel/adreno_gen7_gmu_snapshot.c
+++ b/qcom/opensource/graphics-kernel/adreno_gen7_gmu_snapshot.c
@ -0,0 +1,317 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include "gen7_reg.h"
+#include "adreno.h"
+#include "adreno_gen7.h"
+#include "adreno_gen7_gmu.h"
+#include "adreno_snapshot.h"
+#include "adreno_gen7_0_0_snapshot.h"
+#include "adreno_gen7_2_0_snapshot.h"
+#include "kgsl_device.h"
+
+size_t gen7_snapshot_gmu_mem(struct kgsl_device *device,
+		u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_gmu_mem *mem_hdr =
+		(struct kgsl_snapshot_gmu_mem *)buf;
+	unsigned int *data = (unsigned int *)
+		(buf + sizeof(*mem_hdr));
+	struct gmu_mem_type_desc *desc = priv;
+
+	if (priv == NULL || desc->memdesc->hostptr == NULL)
+		return 0;
+
+	if (remain < desc->memdesc->size + sizeof(*mem_hdr)) {
+		dev_err(device->dev,
+			"snapshot: Not enough memory for the gmu section %d\n",
+			desc->type);
+		return 0;
+	}
+
+	mem_hdr->type = desc->type;
+	mem_hdr->hostaddr = (u64)(uintptr_t)desc->memdesc->hostptr;
+	mem_hdr->gmuaddr = desc->memdesc->gmuaddr;
+	mem_hdr->gpuaddr = 0;
+
+	/* The hw fence queues are mapped as iomem in the kernel */
+	if (desc->type == SNAPSHOT_GMU_MEM_HW_FENCE)
+		memcpy_fromio(data, desc->memdesc->hostptr, desc->memdesc->size);
+	else
+		memcpy(data, desc->memdesc->hostptr, desc->memdesc->size);
+
+	return desc->memdesc->size + sizeof(*mem_hdr);
+}
+
+static size_t gen7_gmu_snapshot_dtcm(struct kgsl_device *device,
+		u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_gmu_mem *mem_hdr =
+		(struct kgsl_snapshot_gmu_mem *)buf;
+	struct gen7_gmu_device *gmu = (struct gen7_gmu_device *)priv;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	u32 *data = (u32 *)(buf + sizeof(*mem_hdr));
+	u32 i;
+
+	if (remain < gmu->vma[GMU_DTCM].size + sizeof(*mem_hdr)) {
+		SNAPSHOT_ERR_NOMEM(device, "GMU DTCM Memory");
+		return 0;
+	}
+
+	mem_hdr->type = SNAPSHOT_GMU_MEM_BIN_BLOCK;
+	mem_hdr->hostaddr = 0;
+	mem_hdr->gmuaddr = gmu->vma[GMU_DTCM].start;
+	mem_hdr->gpuaddr = 0;
+
+	/*
+	 * Read of GMU TCMs over side-band debug controller interface is
+	 * supported on gen7_2_x family
+	 */
+	if (adreno_is_gen7_2_x_family(adreno_dev)) {
+		/*
+		 * region [20]: Dump ITCM/DTCM. Select 1 for DTCM.
+		 * autoInc [31]: Autoincrement the address field after each
+		 * access to TCM_DBG_DATA
+		 */
+		kgsl_regwrite(device, GEN7_CX_DBGC_TCM_DBG_ADDR, BIT(20) | BIT(31));
+
+		for (i = 0; i < (gmu->vma[GMU_DTCM].size >> 2); i++)
+			kgsl_regread(device, GEN7_CX_DBGC_TCM_DBG_DATA, data++);
+	} else {
+		for (i = 0; i < (gmu->vma[GMU_DTCM].size >> 2); i++)
+			gmu_core_regread(device, GEN7_GMU_CM3_DTCM_START + i, data++);
+	}
+
+	return gmu->vma[GMU_DTCM].size + sizeof(*mem_hdr);
+}
+
+static size_t gen7_gmu_snapshot_itcm(struct kgsl_device *device,
+	u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_gmu_mem *mem_hdr =
+			(struct kgsl_snapshot_gmu_mem *)buf;
+	void *dest = buf + sizeof(*mem_hdr);
+	struct gen7_gmu_device *gmu = (struct gen7_gmu_device *)priv;
+
+	if (!gmu->itcm_shadow) {
+		dev_err(&gmu->pdev->dev, "No memory allocated for ITCM shadow capture\n");
+		return 0;
+	}
+
+	if (remain < gmu->vma[GMU_ITCM].size + sizeof(*mem_hdr)) {
+		SNAPSHOT_ERR_NOMEM(device, "GMU ITCM Memory");
+		return 0;
+	}
+
+	mem_hdr->type = SNAPSHOT_GMU_MEM_BIN_BLOCK;
+	mem_hdr->hostaddr = 0;
+	mem_hdr->gmuaddr = gmu->vma[GMU_ITCM].start;
+	mem_hdr->gpuaddr = 0;
+
+	memcpy(dest, gmu->itcm_shadow, gmu->vma[GMU_ITCM].size);
+
+	return gmu->vma[GMU_ITCM].size + sizeof(*mem_hdr);
+}
+
+static void gen7_gmu_snapshot_memories(struct kgsl_device *device,
+	struct gen7_gmu_device *gmu, struct kgsl_snapshot *snapshot)
+{
+	struct gmu_mem_type_desc desc;
+	struct kgsl_memdesc *md;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(gmu->gmu_globals); i++) {
+
+		md = &gmu->gmu_globals[i];
+		if (!md->size)
+			continue;
+
+		desc.memdesc = md;
+		if (md == gmu->hfi.hfi_mem)
+			desc.type = SNAPSHOT_GMU_MEM_HFI;
+		else if (md == gmu->gmu_log)
+			desc.type = SNAPSHOT_GMU_MEM_LOG;
+		else if (md == gmu->dump_mem)
+			desc.type = SNAPSHOT_GMU_MEM_DEBUG;
+		else if ((md == gmu->gmu_init_scratch) || (md == gmu->gpu_boot_scratch))
+			desc.type = SNAPSHOT_GMU_MEM_WARMBOOT;
+		else if (md == gmu->vrb)
+			desc.type = SNAPSHOT_GMU_MEM_VRB;
+		else if (md == gmu->trace.md)
+			desc.type = SNAPSHOT_GMU_MEM_TRACE;
+		else
+			desc.type = SNAPSHOT_GMU_MEM_BIN_BLOCK;
+
+		kgsl_snapshot_add_section(device,
+			KGSL_SNAPSHOT_SECTION_GMU_MEMORY,
+			snapshot, gen7_snapshot_gmu_mem, &desc);
+	}
+}
+
+struct kgsl_snapshot_gmu_version {
+	u32 type;
+	u32 value;
+};
+
+static size_t gen7_snapshot_gmu_version(struct kgsl_device *device,
+		u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf;
+	u32 *data = (u32 *) (buf + sizeof(*header));
+	struct kgsl_snapshot_gmu_version *ver = priv;
+
+	if (remain < DEBUG_SECTION_SZ(1)) {
+		SNAPSHOT_ERR_NOMEM(device, "GMU Version");
+		return 0;
+	}
+
+	header->type = ver->type;
+	header->size = 1;
+
+	*data = ver->value;
+
+	return DEBUG_SECTION_SZ(1);
+}
+
+static void gen7_gmu_snapshot_versions(struct kgsl_device *device,
+		struct gen7_gmu_device *gmu,
+		struct kgsl_snapshot *snapshot)
+{
+	int i;
+
+	struct kgsl_snapshot_gmu_version gmu_vers[] = {
+		{ .type = SNAPSHOT_DEBUG_GMU_CORE_VERSION,
+			.value = gmu->ver.core, },
+		{ .type = SNAPSHOT_DEBUG_GMU_CORE_DEV_VERSION,
+			.value = gmu->ver.core_dev, },
+		{ .type = SNAPSHOT_DEBUG_GMU_PWR_VERSION,
+			.value = gmu->ver.pwr, },
+		{ .type = SNAPSHOT_DEBUG_GMU_PWR_DEV_VERSION,
+			.value = gmu->ver.pwr_dev, },
+		{ .type = SNAPSHOT_DEBUG_GMU_HFI_VERSION,
+			.value = gmu->ver.hfi, },
+	};
+
+	for (i = 0; i < ARRAY_SIZE(gmu_vers); i++)
+		kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+				snapshot, gen7_snapshot_gmu_version,
+				&gmu_vers[i]);
+}
+
+#define RSCC_OFFSET_DWORDS 0x14000
+
+static size_t gen7_snapshot_rscc_registers(struct kgsl_device *device, u8 *buf,
+	size_t remain, void *priv)
+{
+	const u32 *regs = priv;
+	unsigned int *data = (unsigned int *)buf;
+	int count = 0, k;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+
+	/* Figure out how many registers we are going to dump */
+	count = adreno_snapshot_regs_count(regs);
+
+	if (remain < (count * 4)) {
+		SNAPSHOT_ERR_NOMEM(device, "RSCC REGISTERS");
+		return 0;
+	}
+
+	for (regs = priv; regs[0] != UINT_MAX; regs += 2) {
+		unsigned int cnt = REG_COUNT(regs);
+
+		if (cnt == 1) {
+			*data++ = BIT(31) |  regs[0];
+			*data++ =  __raw_readl(gmu->rscc_virt +
+				((regs[0] - RSCC_OFFSET_DWORDS) << 2));
+			continue;
+		}
+		*data++ = regs[0];
+		*data++ = cnt;
+		for (k = regs[0]; k <= regs[1]; k++)
+			*data++ =  __raw_readl(gmu->rscc_virt +
+				((k - RSCC_OFFSET_DWORDS) << 2));
+	}
+
+	/* Return the size of the section */
+	return (count * 4);
+}
+
+/*
+ * gen7_gmu_device_snapshot() - GEN7 GMU snapshot function
+ * @device: Device being snapshotted
+ * @snapshot: Pointer to the snapshot instance
+ *
+ * This is where all of the GEN7 GMU specific bits and pieces are grabbed
+ * into the snapshot memory
+ */
+static void gen7_gmu_device_snapshot(struct kgsl_device *device,
+	struct kgsl_snapshot *snapshot)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	const struct adreno_gen7_core *gpucore = to_gen7_core(ADRENO_DEVICE(device));
+	const struct gen7_snapshot_block_list *gen7_snapshot_block_list =
+						gpucore->gen7_snapshot_block_list;
+
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GMU_MEMORY,
+		snapshot, gen7_gmu_snapshot_itcm, gmu);
+
+	gen7_gmu_snapshot_versions(device, gmu, snapshot);
+
+	gen7_gmu_snapshot_memories(device, gmu, snapshot);
+
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2, snapshot,
+		adreno_snapshot_registers_v2, (void *) gen7_snapshot_block_list->gmu_regs);
+
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2, snapshot,
+		gen7_snapshot_rscc_registers, (void *) gen7_snapshot_block_list->rscc_regs);
+
+	if (!gen7_gmu_gx_is_on(adreno_dev))
+		goto dtcm;
+
+	/* Set fence to ALLOW mode so registers can be read */
+	kgsl_regwrite(device, GEN7_GMU_AO_AHB_FENCE_CTRL, 0);
+	/* Make sure the previous write posted before reading */
+	wmb();
+
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2, snapshot,
+		adreno_snapshot_registers_v2, (void *) gen7_snapshot_block_list->gmu_gx_regs);
+
+	/*
+	 * A stalled SMMU can lead to NoC timeouts when host accesses DTCM.
+	 * DTCM can be read through side-band DBGC interface on gen7_2_x family.
+	 */
+	if (adreno_smmu_is_stalled(adreno_dev) && !adreno_is_gen7_2_x_family(adreno_dev)) {
+		dev_err(&gmu->pdev->dev,
+			"Not dumping dtcm because SMMU is stalled\n");
+		return;
+	}
+
+dtcm:
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GMU_MEMORY,
+		snapshot, gen7_gmu_snapshot_dtcm, gmu);
+}
+
+void gen7_gmu_snapshot(struct adreno_device *adreno_dev,
+	struct kgsl_snapshot *snapshot)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	/*
+	 * Dump external register first to have GPUCC and other external
+	 * register in snapshot to analyze the system state even in partial
+	 * snapshot dump
+	 */
+	gen7_snapshot_external_core_regs(device, snapshot);
+
+	gen7_gmu_device_snapshot(device, snapshot);
+
+	gen7_snapshot(adreno_dev, snapshot);
+
+	gmu_core_regwrite(device, GEN7_GMU_GMU2HOST_INTR_CLR, UINT_MAX);
+	gmu_core_regwrite(device, GEN7_GMU_GMU2HOST_INTR_MASK, HFI_IRQ_MASK);
+}
--- a/qcom/opensource/graphics-kernel/adreno_gen7_hfi.c
+++ b/qcom/opensource/graphics-kernel/adreno_gen7_hfi.c
@ -0,0 +1,870 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/delay.h>
+#include <linux/nvmem-consumer.h>
+
+#include "adreno.h"
+#include "adreno_gen7.h"
+#include "adreno_gen7_gmu.h"
+#include "adreno_gen7_hfi.h"
+#include "kgsl_device.h"
+#include "kgsl_trace.h"
+
+/* Below section is for all structures related to HFI queues */
+#define HFI_QUEUE_MAX HFI_QUEUE_DEFAULT_CNT
+
+/* Total header sizes + queue sizes + 16 for alignment */
+#define HFIMEM_SIZE (sizeof(struct hfi_queue_table) + 16 + \
+		(HFI_QUEUE_SIZE * HFI_QUEUE_MAX))
+
+#define HOST_QUEUE_START_ADDR(hfi_mem, i) \
+	((hfi_mem)->hostptr + HFI_QUEUE_OFFSET(i))
+
+struct gen7_hfi *to_gen7_hfi(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+
+	return &gmu->hfi;
+}
+
+/* Size in below functions are in unit of dwords */
+int gen7_hfi_queue_read(struct gen7_gmu_device *gmu, u32 queue_idx,
+		unsigned int *output, unsigned int max_size)
+{
+	struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem;
+	struct hfi_queue_table *tbl = mem_addr->hostptr;
+	struct hfi_queue_header *hdr = &tbl->qhdr[queue_idx];
+	u32 *queue;
+	u32 msg_hdr;
+	u32 i, read;
+	u32 size;
+	int result = 0;
+
+	if (hdr->status == HFI_QUEUE_STATUS_DISABLED)
+		return -EINVAL;
+
+	if (hdr->read_index == hdr->write_index)
+		return -ENODATA;
+
+	/* Clear the output data before populating */
+	memset(output, 0, max_size);
+
+	queue = HOST_QUEUE_START_ADDR(mem_addr, queue_idx);
+	msg_hdr = queue[hdr->read_index];
+	size = MSG_HDR_GET_SIZE(msg_hdr);
+
+	if (size > (max_size >> 2)) {
+		dev_err(&gmu->pdev->dev,
+		"HFI message too big: hdr:0x%x rd idx=%d\n",
+			msg_hdr, hdr->read_index);
+		result = -EMSGSIZE;
+		goto done;
+	}
+
+	read = hdr->read_index;
+
+	if (read < hdr->queue_size) {
+		for (i = 0; i < size && i < (max_size >> 2); i++) {
+			output[i] = queue[read];
+			read = (read + 1)%hdr->queue_size;
+		}
+		result = size;
+	} else {
+		/* In case FW messed up */
+		dev_err(&gmu->pdev->dev,
+			"Read index %d greater than queue size %d\n",
+			hdr->read_index, hdr->queue_size);
+		result = -ENODATA;
+	}
+
+	read = ALIGN(read, SZ_4) % hdr->queue_size;
+
+	hfi_update_read_idx(hdr, read);
+
+	/* For acks, trace the packet for which this ack was sent */
+	if (MSG_HDR_GET_TYPE(msg_hdr) == HFI_MSG_ACK)
+		trace_kgsl_hfi_receive(MSG_HDR_GET_ID(output[1]),
+			MSG_HDR_GET_SIZE(output[1]),
+			MSG_HDR_GET_SEQNUM(output[1]));
+	else
+		trace_kgsl_hfi_receive(MSG_HDR_GET_ID(msg_hdr),
+			MSG_HDR_GET_SIZE(msg_hdr), MSG_HDR_GET_SEQNUM(msg_hdr));
+
+done:
+	return result;
+}
+
+int gen7_hfi_queue_write(struct adreno_device *adreno_dev, u32 queue_idx,
+		u32 *msg, u32 size_bytes)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct hfi_queue_table *tbl = gmu->hfi.hfi_mem->hostptr;
+	struct hfi_queue_header *hdr = &tbl->qhdr[queue_idx];
+	u32 *queue;
+	u32 i, write_idx, read_idx, empty_space;
+	u32 size_dwords = size_bytes >> 2;
+	u32 align_size = ALIGN(size_dwords, SZ_4);
+	u32 id = MSG_HDR_GET_ID(*msg);
+
+	if (hdr->status == HFI_QUEUE_STATUS_DISABLED || !IS_ALIGNED(size_bytes, sizeof(u32)))
+		return -EINVAL;
+
+	queue = HOST_QUEUE_START_ADDR(gmu->hfi.hfi_mem, queue_idx);
+
+	write_idx = hdr->write_index;
+	read_idx = hdr->read_index;
+
+	empty_space = (write_idx >= read_idx) ?
+			(hdr->queue_size - (write_idx - read_idx))
+			: (read_idx - write_idx);
+
+	if (empty_space <= align_size)
+		return -ENOSPC;
+
+	for (i = 0; i < size_dwords; i++) {
+		queue[write_idx] = msg[i];
+		write_idx = (write_idx + 1) % hdr->queue_size;
+	}
+
+	/* Cookify any non used data at the end of the write buffer */
+	for (; i < align_size; i++) {
+		queue[write_idx] = 0xfafafafa;
+		write_idx = (write_idx + 1) % hdr->queue_size;
+	}
+
+	trace_kgsl_hfi_send(id, size_dwords, MSG_HDR_GET_SEQNUM(*msg));
+
+	hfi_update_write_idx(&hdr->write_index, write_idx);
+
+	return 0;
+}
+
+int gen7_hfi_cmdq_write(struct adreno_device *adreno_dev, u32 *msg, u32 size_bytes)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct gen7_hfi *hfi = &gmu->hfi;
+	int ret;
+
+	spin_lock(&hfi->cmdq_lock);
+
+	if (test_bit(MSG_HDR_GET_ID(msg[0]), hfi->wb_set_record_bitmask))
+		*msg = RECORD_MSG_HDR(*msg);
+
+	ret = gen7_hfi_queue_write(adreno_dev, HFI_CMD_ID, msg, size_bytes);
+
+	/*
+	 * Some messages like ACD table and perf table are saved in memory, so we need
+	 * to reset the header to make sure we do not send a record enabled bit incase
+	 * we change the warmboot setting from debugfs
+	 */
+	*msg = CLEAR_RECORD_MSG_HDR(*msg);
+	/*
+	 * Memory barrier to make sure packet and write index are written before
+	 * an interrupt is raised
+	 */
+	wmb();
+
+	/* Send interrupt to GMU to receive the message */
+	if (!ret)
+		gmu_core_regwrite(KGSL_DEVICE(adreno_dev),
+			GEN7_GMU_HOST2GMU_INTR_SET, 0x1);
+
+	spin_unlock(&hfi->cmdq_lock);
+
+	return ret;
+}
+
+/* Sizes of the queue and message are in unit of dwords */
+static void init_queues(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem;
+	int i;
+	struct hfi_queue_table *tbl;
+	struct hfi_queue_header *hdr;
+	struct {
+		unsigned int idx;
+		unsigned int pri;
+		unsigned int status;
+	} queue[HFI_QUEUE_MAX] = {
+		{ HFI_CMD_ID, HFI_CMD_PRI, HFI_QUEUE_STATUS_ENABLED },
+		{ HFI_MSG_ID, HFI_MSG_PRI, HFI_QUEUE_STATUS_ENABLED },
+		{ HFI_DBG_ID, HFI_DBG_PRI, HFI_QUEUE_STATUS_ENABLED },
+	};
+
+	/* Fill Table Header */
+	tbl = mem_addr->hostptr;
+	tbl->qtbl_hdr.version = 0;
+	tbl->qtbl_hdr.size = sizeof(struct hfi_queue_table) >> 2;
+	tbl->qtbl_hdr.qhdr0_offset = sizeof(struct hfi_queue_table_header) >> 2;
+	tbl->qtbl_hdr.qhdr_size = sizeof(struct hfi_queue_header) >> 2;
+	tbl->qtbl_hdr.num_q = HFI_QUEUE_MAX;
+	tbl->qtbl_hdr.num_active_q = HFI_QUEUE_MAX;
+
+	memset(&tbl->qhdr[0], 0, sizeof(tbl->qhdr));
+
+	/* Fill Individual Queue Headers */
+	for (i = 0; i < HFI_QUEUE_MAX; i++) {
+		hdr = &tbl->qhdr[i];
+		hdr->start_addr = GMU_QUEUE_START_ADDR(mem_addr->gmuaddr, i);
+		hdr->type = QUEUE_HDR_TYPE(queue[i].idx, queue[i].pri, 0, 0);
+		hdr->status = queue[i].status;
+		hdr->queue_size = HFI_QUEUE_SIZE >> 2; /* convert to dwords */
+	}
+}
+
+int gen7_hfi_init(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct gen7_hfi *hfi = &gmu->hfi;
+
+	/* Allocates & maps memory for HFI */
+	if (IS_ERR_OR_NULL(hfi->hfi_mem)) {
+		hfi->hfi_mem = gen7_reserve_gmu_kernel_block(gmu, 0,
+				HFIMEM_SIZE, GMU_NONCACHED_KERNEL, 0);
+		if (!IS_ERR(hfi->hfi_mem))
+			init_queues(adreno_dev);
+	}
+
+	return PTR_ERR_OR_ZERO(hfi->hfi_mem);
+}
+
+int gen7_receive_ack_cmd(struct gen7_gmu_device *gmu, void *rcvd,
+	struct pending_cmd *ret_cmd)
+{
+	struct adreno_device *adreno_dev = gen7_gmu_to_adreno(gmu);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 *ack = rcvd;
+	u32 hdr = ack[0];
+	u32 req_hdr = ack[1];
+
+	if (ret_cmd == NULL)
+		return -EINVAL;
+
+	if (CMP_HFI_ACK_HDR(ret_cmd->sent_hdr, req_hdr)) {
+		memcpy(&ret_cmd->results, ack, MSG_HDR_GET_SIZE(hdr) << 2);
+		return 0;
+	}
+
+	/* Didn't find the sender, list the waiter */
+	dev_err_ratelimited(&gmu->pdev->dev,
+		"HFI ACK: Cannot find sender for 0x%8.8x Waiter: 0x%8.8x\n",
+		req_hdr, ret_cmd->sent_hdr);
+
+	gmu_core_fault_snapshot(device);
+
+	return -ENODEV;
+}
+
+static int poll_gmu_reg(struct adreno_device *adreno_dev,
+	u32 offsetdwords, unsigned int expected_val,
+	unsigned int mask, unsigned int timeout_ms)
+{
+	unsigned int val;
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned long timeout = jiffies + msecs_to_jiffies(timeout_ms);
+	bool nmi = false;
+
+	while (time_is_after_jiffies(timeout)) {
+		gmu_core_regread(device, offsetdwords, &val);
+		if ((val & mask) == expected_val)
+			return 0;
+
+		/*
+		 * If GMU firmware fails any assertion, error message is sent
+		 * to KMD and NMI is triggered. So check if GMU is in NMI and
+		 * timeout early. Bits [11:9] of A6XX_GMU_CM3_FW_INIT_RESULT
+		 * contain GMU reset status. Non zero value here indicates that
+		 * GMU reset is active, NMI handler would eventually complete
+		 * and GMU would wait for recovery.
+		 */
+		gmu_core_regread(device, GEN7_GMU_CM3_FW_INIT_RESULT, &val);
+		if (val & 0xE00) {
+			nmi = true;
+			break;
+		}
+
+		usleep_range(10, 100);
+	}
+
+	/* Check one last time */
+	gmu_core_regread(device, offsetdwords, &val);
+	if ((val & mask) == expected_val)
+		return 0;
+
+	dev_err(&gmu->pdev->dev,
+		"Reg poll %s: offset 0x%x, want 0x%x, got 0x%x\n",
+		nmi ? "abort" : "timeout", offsetdwords, expected_val,
+		val & mask);
+
+	return -ETIMEDOUT;
+}
+
+static int gen7_hfi_send_cmd_wait_inline(struct adreno_device *adreno_dev,
+	void *data, u32 size_bytes, struct pending_cmd *ret_cmd)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int rc;
+	u32 *cmd = data;
+	struct gen7_hfi *hfi = &gmu->hfi;
+	unsigned int seqnum = atomic_inc_return(&hfi->seqnum);
+
+	*cmd = MSG_HDR_SET_SEQNUM_SIZE(*cmd, seqnum, size_bytes >> 2);
+
+	if (ret_cmd == NULL)
+		return gen7_hfi_cmdq_write(adreno_dev, cmd, size_bytes);
+
+	ret_cmd->sent_hdr = cmd[0];
+
+	rc = gen7_hfi_cmdq_write(adreno_dev, cmd, size_bytes);
+	if (rc)
+		return rc;
+
+	rc = poll_gmu_reg(adreno_dev, GEN7_GMU_GMU2HOST_INTR_INFO,
+		HFI_IRQ_MSGQ_MASK, HFI_IRQ_MSGQ_MASK, HFI_RSP_TIMEOUT);
+
+	if (rc) {
+		gmu_core_fault_snapshot(device);
+		dev_err(&gmu->pdev->dev,
+		"Timed out waiting on ack for 0x%8.8x (id %d, sequence %d)\n",
+		cmd[0], MSG_HDR_GET_ID(*cmd), MSG_HDR_GET_SEQNUM(*cmd));
+		return rc;
+	}
+
+	/* Clear the interrupt */
+	gmu_core_regwrite(device, GEN7_GMU_GMU2HOST_INTR_CLR,
+		HFI_IRQ_MSGQ_MASK);
+
+	rc = gen7_hfi_process_queue(gmu, HFI_MSG_ID, ret_cmd);
+
+	return rc;
+}
+
+int gen7_hfi_send_generic_req(struct adreno_device *adreno_dev, void *cmd, u32 size_bytes)
+{
+	struct pending_cmd ret_cmd;
+	int rc;
+
+	memset(&ret_cmd, 0, sizeof(ret_cmd));
+
+	rc = gen7_hfi_send_cmd_wait_inline(adreno_dev, cmd, size_bytes, &ret_cmd);
+	if (rc)
+		return rc;
+
+	if (ret_cmd.results[2]) {
+		struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+		struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+		gmu_core_fault_snapshot(device);
+		dev_err(&gmu->pdev->dev,
+				"HFI ACK failure: Req=0x%8.8X, Result=0x%8.8X\n",
+				ret_cmd.results[1],
+				ret_cmd.results[2]);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int gen7_hfi_send_core_fw_start(struct adreno_device *adreno_dev)
+{
+	struct hfi_core_fw_start_cmd cmd = {
+		.handle = 0x0,
+	};
+	int ret;
+
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_CORE_FW_START);
+	if (ret)
+		return ret;
+
+	return gen7_hfi_send_generic_req(adreno_dev, &cmd, sizeof(cmd));
+}
+
+static const char *feature_to_string(u32 feature)
+{
+	if (feature == HFI_FEATURE_ACD)
+		return "ACD";
+
+	return "unknown";
+}
+
+/* For sending hfi message inline to handle GMU return type error */
+int gen7_hfi_send_generic_req_v5(struct adreno_device *adreno_dev, void *cmd,
+		struct pending_cmd *ret_cmd, u32 size_bytes)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	int rc;
+
+	if (GMU_VER_MINOR(gmu->ver.hfi) <= 4)
+		return gen7_hfi_send_generic_req(adreno_dev, cmd, size_bytes);
+
+	rc = gen7_hfi_send_cmd_wait_inline(adreno_dev, cmd, size_bytes, ret_cmd);
+	if (rc)
+		return rc;
+
+	switch (ret_cmd->results[3]) {
+	case GMU_SUCCESS:
+		rc = ret_cmd->results[2];
+		break;
+	case GMU_ERROR_NO_ENTRY:
+		/* Unique error to handle undefined HFI msgs by caller */
+		rc = -ENOENT;
+		break;
+	case GMU_ERROR_TIMEOUT:
+		rc = -EINVAL;
+		break;
+	default:
+		gmu_core_fault_snapshot(KGSL_DEVICE(adreno_dev));
+		dev_err(&gmu->pdev->dev,
+			"HFI ACK: Req=0x%8.8X, Result=0x%8.8X Error:0x%8.8X\n",
+			ret_cmd->results[1], ret_cmd->results[2], ret_cmd->results[3]);
+		rc = -EINVAL;
+		break;
+	}
+
+	return rc;
+}
+
+int gen7_hfi_send_feature_ctrl(struct adreno_device *adreno_dev,
+	u32 feature, u32 enable, u32 data)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct pending_cmd ret_cmd = {0};
+	struct hfi_feature_ctrl_cmd cmd = {
+		.feature = feature,
+		.enable = enable,
+		.data = data,
+	};
+	int ret;
+
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_FEATURE_CTRL);
+	if (ret)
+		return ret;
+
+	ret = gen7_hfi_send_generic_req_v5(adreno_dev, &cmd, &ret_cmd, sizeof(cmd));
+	if (ret < 0)
+		dev_err(&gmu->pdev->dev,
+				"Unable to %s feature %s (%d)\n",
+				enable ? "enable" : "disable",
+				feature_to_string(feature),
+				feature);
+	return ret;
+}
+
+int gen7_hfi_send_get_value(struct adreno_device *adreno_dev, u32 type, u32 subtype)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct pending_cmd ret_cmd = {0};
+	struct hfi_get_value_cmd cmd = {
+		.type = type,
+		.subtype = subtype,
+	};
+	int ret;
+
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_GET_VALUE);
+	if (ret)
+		return ret;
+
+	ret = gen7_hfi_send_generic_req_v5(adreno_dev, &cmd, &ret_cmd, sizeof(cmd));
+	if (ret < 0)
+		dev_err(&gmu->pdev->dev,
+			"Unable to get HFI Value type: %d, subtype: %d, error = %d\n",
+			type, subtype, ret);
+
+	return ret;
+}
+
+int gen7_hfi_send_set_value(struct adreno_device *adreno_dev,
+		u32 type, u32 subtype, u32 data)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct pending_cmd ret_cmd = {0};
+	struct hfi_set_value_cmd cmd = {
+		.type = type,
+		.subtype = subtype,
+		.data = data,
+	};
+	int ret;
+
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_SET_VALUE);
+	if (ret)
+		return ret;
+
+	ret = gen7_hfi_send_generic_req_v5(adreno_dev, &cmd, &ret_cmd, sizeof(cmd));
+	if (ret < 0)
+		dev_err(&gmu->pdev->dev,
+			"Unable to set HFI Value %d, %d to %d, error = %d\n",
+			type, subtype, data, ret);
+	return ret;
+}
+
+void adreno_gen7_receive_err_req(struct gen7_gmu_device *gmu, void *rcvd)
+{
+	struct hfi_err_cmd *cmd = rcvd;
+
+	dev_err(&gmu->pdev->dev, "HFI Error Received: %d %d %.16s\n",
+			((cmd->error_code >> 16) & 0xffff),
+			(cmd->error_code & 0xffff),
+			(char *) cmd->data);
+}
+
+void adreno_gen7_receive_debug_req(struct gen7_gmu_device *gmu, void *rcvd)
+{
+	struct hfi_debug_cmd *cmd = rcvd;
+
+	dev_dbg(&gmu->pdev->dev, "HFI Debug Received: %d %d %d\n",
+			cmd->type, cmd->timestamp, cmd->data);
+}
+
+int gen7_hfi_process_queue(struct gen7_gmu_device *gmu,
+		u32 queue_idx, struct pending_cmd *ret_cmd)
+{
+	u32 rcvd[MAX_RCVD_SIZE];
+
+	while (gen7_hfi_queue_read(gmu, queue_idx, rcvd, sizeof(rcvd)) > 0) {
+		/* ACK Handler */
+		if (MSG_HDR_GET_TYPE(rcvd[0]) == HFI_MSG_ACK) {
+			int ret = gen7_receive_ack_cmd(gmu, rcvd, ret_cmd);
+
+			if (ret)
+				return ret;
+			continue;
+		}
+
+		/* Request Handler */
+		switch (MSG_HDR_GET_ID(rcvd[0])) {
+		case F2H_MSG_ERR: /* No Reply */
+			adreno_gen7_receive_err_req(gmu, rcvd);
+			break;
+		case F2H_MSG_DEBUG: /* No Reply */
+			adreno_gen7_receive_debug_req(gmu, rcvd);
+			break;
+		default: /* No Reply */
+			dev_err(&gmu->pdev->dev,
+				"HFI request %d not supported\n",
+				MSG_HDR_GET_ID(rcvd[0]));
+			break;
+		}
+	}
+
+	return 0;
+}
+
+int gen7_hfi_send_bcl_feature_ctrl(struct adreno_device *adreno_dev)
+{
+	if (!adreno_dev->bcl_enabled)
+		return 0;
+
+	/*
+	 * BCL data is expected by gmu in below format
+	 * BIT[0] - response type
+	 * BIT[1:7] - Throttle level 1 (optional)
+	 * BIT[8:14] - Throttle level 2 (optional)
+	 * BIT[15:21] - Throttle level 3 (optional)
+	 */
+	return gen7_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_BCL, 1, adreno_dev->bcl_data);
+}
+
+static int gen7_hfi_send_clx_v1_feature_ctrl(struct adreno_device *adreno_dev)
+{
+	int ret;
+	struct hfi_clx_table_v1_cmd cmd = {0};
+
+	/* Make sure the table is valid before enabling feature */
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_CLX_TBL);
+	if (ret)
+		return ret;
+
+	ret = gen7_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_CLX, 1, 0);
+	if (ret)
+		return ret;
+
+	/* GMU supports HW CLX V2 only with both HFI V1 and V2 data formats */
+	cmd.data0 = FIELD_PREP(GENMASK(31, 16), 0x2) | FIELD_PREP(GENMASK(15, 0), 0x1);
+	cmd.data1 = FIELD_PREP(GENMASK(31, 29), 1) |
+				FIELD_PREP(GENMASK(28, 28), 1) |
+				FIELD_PREP(GENMASK(27, 22), 1) |
+				FIELD_PREP(GENMASK(21, 16), 40) |
+				FIELD_PREP(GENMASK(15, 0), 0);
+	cmd.clxt = 0;
+	cmd.clxh = 0;
+	cmd.urgmode = 1;
+	cmd.lkgen = 0;
+
+	return gen7_hfi_send_generic_req(adreno_dev, &cmd, sizeof(cmd));
+}
+
+static int gen7_hfi_send_clx_v2_feature_ctrl(struct adreno_device *adreno_dev)
+{
+	int ret = 0;
+	struct hfi_clx_table_v2_cmd cmd = {0};
+
+	/* Make sure the table is valid before enabling feature */
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_CLX_TBL);
+	if (ret)
+		return ret;
+
+	ret = gen7_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_CLX, 1, 0);
+	if (ret)
+		return ret;
+
+	cmd.version = FIELD_PREP(GENMASK(31, 16), 0x2) | FIELD_PREP(GENMASK(15, 0), 0x1);
+	/* cmd.domain[0] is never used but needed per hfi spec */
+	cmd.domain[1].data0 = FIELD_PREP(GENMASK(31, 29), 1) |
+				FIELD_PREP(GENMASK(28, 28), 1) |
+				FIELD_PREP(GENMASK(27, 22), 1) |
+				FIELD_PREP(GENMASK(21, 16), 40) |
+				FIELD_PREP(GENMASK(15, 0), 0);
+	cmd.domain[1].clxt = 0;
+	cmd.domain[1].clxh = 0;
+	cmd.domain[1].urgmode = 1;
+	cmd.domain[1].lkgen = 0;
+	cmd.domain[1].currbudget = 50;
+
+	return gen7_hfi_send_generic_req(adreno_dev, &cmd, sizeof(cmd));
+}
+
+int gen7_hfi_send_clx_feature_ctrl(struct adreno_device *adreno_dev)
+{
+	if (!adreno_dev->clx_enabled)
+		return 0;
+
+	/* gen7_11_0 GPU uses HFI CLX data version 1 */
+	if (adreno_is_gen7_11_0(adreno_dev))
+		return gen7_hfi_send_clx_v1_feature_ctrl(adreno_dev);
+
+	return gen7_hfi_send_clx_v2_feature_ctrl(adreno_dev);
+}
+
+#define EVENT_PWR_ACD_THROTTLE_PROF 44
+
+int gen7_hfi_send_acd_feature_ctrl(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	int ret = 0;
+
+	if (adreno_dev->acd_enabled) {
+		ret = gen7_hfi_send_feature_ctrl(adreno_dev,
+			HFI_FEATURE_ACD, 1, 0);
+		if (ret)
+			return ret;
+
+		ret = gen7_hfi_send_generic_req(adreno_dev,
+				&gmu->hfi.acd_table, sizeof(gmu->hfi.acd_table));
+		if (ret)
+			return ret;
+
+		gen7_hfi_send_set_value(adreno_dev, HFI_VALUE_LOG_EVENT_ON,
+				EVENT_PWR_ACD_THROTTLE_PROF, 0);
+	}
+
+	return 0;
+}
+
+int gen7_hfi_send_ifpc_feature_ctrl(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+
+	if (gmu->idle_level == GPU_HW_IFPC)
+		return gen7_hfi_send_feature_ctrl(adreno_dev,
+				HFI_FEATURE_IFPC, 1, adreno_dev->ifpc_hyst);
+	return 0;
+}
+
+static void reset_hfi_queues(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem;
+	struct hfi_queue_table *tbl = mem_addr->hostptr;
+	struct hfi_queue_header *hdr;
+	unsigned int i;
+
+	/* Flush HFI queues */
+	for (i = 0; i < HFI_QUEUE_MAX; i++) {
+		hdr = &tbl->qhdr[i];
+		if (hdr->status == HFI_QUEUE_STATUS_DISABLED)
+			continue;
+
+		hdr->read_index = hdr->write_index;
+	}
+}
+
+/* Fill the entry and return the dword count written */
+static u32 _fill_table_entry(struct hfi_table_entry *entry, u32 count,
+		u32 stride_bytes, u32 *data)
+{
+	entry->count = count;
+	entry->stride = stride_bytes >> 2; /* entry->stride is in dwords */
+	memcpy(entry->data, data, stride_bytes * count);
+
+	/* Return total dword count of entry + data */
+	return (sizeof(*entry) >> 2) + (entry->count * entry->stride);
+}
+
+int gen7_hfi_send_gpu_perf_table(struct adreno_device *adreno_dev)
+{
+	/*
+	 * Buffer to store either hfi_table_cmd or hfi_dcvstable_cmd.
+	 * Current max size for either is 165 dwords.
+	 */
+	static u32 cmd_buf[200];
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct gen7_dcvs_table *tbl = &gmu->dcvs_table;
+	int ret = 0;
+
+	/* Starting with GMU HFI Version 2.6.1, use H2F_MSG_TABLE */
+	if (gmu->ver.hfi >= HFI_VERSION(2, 6, 1)) {
+		struct hfi_table_cmd *cmd = (struct hfi_table_cmd *)&cmd_buf[0];
+		u32 dword_off;
+
+		/* Already setup, so just send cmd */
+		if (cmd->hdr)
+			return gen7_hfi_send_generic_req(adreno_dev, cmd,
+					MSG_HDR_GET_SIZE(cmd->hdr) << 2);
+
+		if (tbl->gpu_level_num > MAX_GX_LEVELS || tbl->gmu_level_num > MAX_CX_LEVELS)
+			return -EINVAL;
+
+		/* CMD starts with struct hfi_table_cmd data */
+		cmd->type = HFI_TABLE_GPU_PERF;
+		dword_off = sizeof(*cmd) >> 2;
+
+		/* Fill in the table entry and data starting at dword_off */
+		dword_off += _fill_table_entry((struct hfi_table_entry *)&cmd_buf[dword_off],
+				tbl->gpu_level_num, sizeof(struct opp_gx_desc),
+				(u32 *)tbl->gx_votes);
+
+		/* Fill in the table entry and data starting at dword_off */
+		dword_off += _fill_table_entry((struct hfi_table_entry *)&cmd_buf[dword_off],
+				tbl->gmu_level_num, sizeof(struct opp_desc),
+				(u32 *)tbl->cx_votes);
+
+		cmd->hdr = CREATE_MSG_HDR(H2F_MSG_TABLE, HFI_MSG_CMD);
+		cmd->hdr = MSG_HDR_SET_SIZE(cmd->hdr, dword_off);
+
+		ret = gen7_hfi_send_generic_req(adreno_dev, cmd, dword_off << 2);
+	} else {
+		struct hfi_dcvstable_cmd *cmd = (struct hfi_dcvstable_cmd *)&cmd_buf[0];
+
+		/* Already setup, so just send cmd */
+		if (cmd->hdr)
+			return gen7_hfi_send_generic_req(adreno_dev, cmd, sizeof(*cmd));
+
+		if (tbl->gpu_level_num > MAX_GX_LEVELS_LEGACY || tbl->gmu_level_num > MAX_CX_LEVELS)
+			return -EINVAL;
+
+		ret = CMD_MSG_HDR(*cmd, H2F_MSG_PERF_TBL);
+		if (ret)
+			return ret;
+
+		cmd->gpu_level_num = tbl->gpu_level_num;
+		cmd->gmu_level_num = tbl->gmu_level_num;
+		memcpy(&cmd->gx_votes, tbl->gx_votes,
+				sizeof(struct opp_gx_desc) * cmd->gpu_level_num);
+		memcpy(&cmd->cx_votes, tbl->cx_votes,
+				sizeof(struct opp_desc) * cmd->gmu_level_num);
+
+		ret = gen7_hfi_send_generic_req(adreno_dev, cmd, sizeof(*cmd));
+	}
+
+	return ret;
+}
+
+int gen7_hfi_start(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int result;
+
+	reset_hfi_queues(adreno_dev);
+
+	result = gen7_hfi_send_gpu_perf_table(adreno_dev);
+	if (result)
+		goto err;
+
+	result = gen7_hfi_send_generic_req(adreno_dev, &gmu->hfi.bw_table,
+			sizeof(gmu->hfi.bw_table));
+	if (result)
+		goto err;
+
+	result = gen7_hfi_send_acd_feature_ctrl(adreno_dev);
+	if (result)
+		goto err;
+
+	result = gen7_hfi_send_bcl_feature_ctrl(adreno_dev);
+	if (result)
+		goto err;
+
+	result = gen7_hfi_send_clx_feature_ctrl(adreno_dev);
+	if (result)
+		goto err;
+
+	result = gen7_hfi_send_ifpc_feature_ctrl(adreno_dev);
+	if (result)
+		goto err;
+
+	result = gen7_hfi_send_core_fw_start(adreno_dev);
+	if (result)
+		goto err;
+
+	set_bit(GMU_PRIV_HFI_STARTED, &gmu->flags);
+
+	/* Request default DCVS level */
+	result = kgsl_pwrctrl_set_default_gpu_pwrlevel(device);
+	if (result)
+		goto err;
+
+	/* Request default BW vote */
+	result = kgsl_pwrctrl_axi(device, true);
+
+err:
+	if (result)
+		gen7_hfi_stop(adreno_dev);
+
+	return result;
+
+}
+
+void gen7_hfi_stop(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	kgsl_pwrctrl_axi(device, false);
+
+	clear_bit(GMU_PRIV_HFI_STARTED, &gmu->flags);
+}
+
+/* HFI interrupt handler */
+irqreturn_t gen7_hfi_irq_handler(int irq, void *data)
+{
+	struct kgsl_device *device = data;
+	struct gen7_gmu_device *gmu = to_gen7_gmu(ADRENO_DEVICE(device));
+	unsigned int status = 0;
+
+	gmu_core_regread(device, GEN7_GMU_GMU2HOST_INTR_INFO, &status);
+	gmu_core_regwrite(device, GEN7_GMU_GMU2HOST_INTR_CLR, HFI_IRQ_MASK);
+
+	if (status & HFI_IRQ_DBGQ_MASK)
+		gen7_hfi_process_queue(gmu, HFI_DBG_ID, NULL);
+	if (status & HFI_IRQ_CM3_FAULT_MASK) {
+		dev_err_ratelimited(&gmu->pdev->dev,
+				"GMU CM3 fault interrupt received\n");
+		atomic_set(&gmu->cm3_fault, 1);
+
+		/* make sure other CPUs see the update */
+		smp_wmb();
+	}
+	if (status & ~HFI_IRQ_MASK)
+		dev_err_ratelimited(&gmu->pdev->dev,
+				"Unhandled HFI interrupts 0x%lx\n",
+				status & ~HFI_IRQ_MASK);
+
+	return IRQ_HANDLED;
+}
--- a/qcom/opensource/graphics-kernel/adreno_gen7_hfi.h
+++ b/qcom/opensource/graphics-kernel/adreno_gen7_hfi.h
@ -0,0 +1,234 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+#ifndef __ADRENO_GEN7_HFI_H
+#define __ADRENO_GEN7_HFI_H
+
+#include "adreno_hfi.h"
+
+/**
+ * struct gen7_hfi - HFI control structure
+ */
+struct gen7_hfi {
+	/** @irq: HFI interrupt line */
+	int irq;
+	/** @seqnum: atomic counter that is incremented for each message sent.
+	 *   The value of the counter is used as sequence number for HFI message.
+	 */
+	atomic_t seqnum;
+	/** @hfi_mem: Memory descriptor for the hfi memory */
+	struct kgsl_memdesc *hfi_mem;
+	/** @bw_table: HFI BW table buffer */
+	struct hfi_bwtable_cmd bw_table;
+	/** @acd_table: HFI table for ACD data */
+	struct hfi_acd_table_cmd acd_table;
+	/** @cmdq_lock: Spinlock for accessing the cmdq */
+	spinlock_t cmdq_lock;
+	/**
+	 * @wb_set_record_bitmask: Bitmask to enable or disable the recording
+	 * of messages in the GMU scratch.
+	 */
+	unsigned long wb_set_record_bitmask[BITS_TO_LONGS(HFI_MAX_ID)];
+};
+
+struct gen7_gmu_device;
+
+/* gen7_hfi_irq_handler - IRQ handler for HFI interripts */
+irqreturn_t gen7_hfi_irq_handler(int irq, void *data);
+
+/**
+ * gen7_hfi_start - Send the various HFIs during device boot up
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_hfi_start(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hfi_start - Send the various HFIs during device boot up
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+void gen7_hfi_stop(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hfi_init - Initialize hfi resources
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * This function allocates and sets up hfi queues
+ * when a process creates the very first kgsl instance
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_hfi_init(struct adreno_device *adreno_dev);
+
+/* Helper function to get to gen7 hfi struct from adreno device */
+struct gen7_hfi *to_gen7_hfi(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hfi_queue_write - Write a command to hfi queue
+ * @adreno_dev: Pointer to the adreno device
+ * @queue_idx: destination queue id
+ * @msg: Data to be written to the queue
+ * @size_bytes: Size of the command in bytes
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_hfi_queue_write(struct adreno_device *adreno_dev, u32 queue_idx,
+		u32 *msg, u32 size_bytes);
+
+/**
+ * gen7_hfi_queue_read - Read data from hfi queue
+ * @gmu: Pointer to the gen7 gmu device
+ * @queue_idx: queue id to read from
+ * @output: Pointer to read the data into
+ * @max_size: Number of bytes to read from the queue
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_hfi_queue_read(struct gen7_gmu_device *gmu, u32 queue_idx,
+		u32 *output, u32 max_size);
+
+/**
+ * gen7_receive_ack_cmd - Process ack type packets
+ * @gmu: Pointer to the gen7 gmu device
+ * @rcvd: Pointer to the data read from hfi queue
+ * @ret_cmd: Container for the hfi packet for which this ack is received
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_receive_ack_cmd(struct gen7_gmu_device *gmu, void *rcvd,
+		struct pending_cmd *ret_cmd);
+
+/**
+ * gen7_hfi_send_feature_ctrl - Enable gmu feature via hfi
+ * @adreno_dev: Pointer to the adreno device
+ * @feature: feature to be enabled or disabled
+ * enable: Set 1 to enable or 0 to disable a feature
+ * @data: payload for the send feature hfi packet
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_hfi_send_feature_ctrl(struct adreno_device *adreno_dev,
+		u32 feature, u32 enable, u32 data);
+
+/**
+ * gen7_hfi_send_get_value - Send gmu get_values via hfi
+ * @adreno_dev: Pointer to the adreno device
+ * @type: GMU get_value type
+ * @subtype: GMU get_value subtype
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_hfi_send_get_value(struct adreno_device *adreno_dev, u32 type, u32 subtype);
+
+/**
+ * gen7_hfi_send_set_value - Send gmu set_values via hfi
+ * @adreno_dev: Pointer to the adreno device
+ * @type: GMU set_value type
+ * @subtype: GMU set_value subtype
+ * @data: Value to set
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_hfi_send_set_value(struct adreno_device *adreno_dev,
+		u32 type, u32 subtype, u32 data);
+
+/**
+ * gen7_hfi_send_core_fw_start - Send the core fw start hfi
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_hfi_send_core_fw_start(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hfi_send_acd_feature_ctrl - Send the acd table and acd feature
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_hfi_send_acd_feature_ctrl(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hfi_send_generic_req - Send a generic hfi packet
+ * @adreno_dev: Pointer to the adreno device
+ * @cmd: Pointer to the hfi packet header and data
+ * @size_bytes: Size of the packet in bytes
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_hfi_send_generic_req(struct adreno_device *adreno_dev, void *cmd, u32 size_bytes);
+
+/**
+ * gen7_hfi_send_generic_req_v5 - Send a generic hfi packet with additional error handling
+ * @adreno_dev: Pointer to the adreno device
+ * @cmd: Pointer to the hfi packet header and data
+ * @ret_cmd: Ack for the command we just sent
+ * @size_bytes: Size of the packet in bytes
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_hfi_send_generic_req_v5(struct adreno_device *adreno_dev, void *cmd,
+		struct pending_cmd *ret_cmd, u32 size_bytes);
+
+/**
+ * gen7_hfi_send_bcl_feature_ctrl - Send the bcl feature hfi packet
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_hfi_send_bcl_feature_ctrl(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hfi_send_clx_feature_ctrl - Send the clx feature hfi packet
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_hfi_send_clx_feature_ctrl(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hfi_send_ifpc_feature_ctrl - Send the ipfc feature hfi packet
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_hfi_send_ifpc_feature_ctrl(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hfi_send_gpu_perf_table - Send the gpu perf table hfi packet
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_hfi_send_gpu_perf_table(struct adreno_device *adreno_dev);
+
+/*
+ * gen7_hfi_process_queue - Check hfi queue for messages from gmu
+ * @gmu: Pointer to the gen7 gmu device
+ * @queue_idx: queue id to be processed
+ * @ret_cmd: Container for data needed for waiting for the ack
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_hfi_process_queue(struct gen7_gmu_device *gmu,
+		u32 queue_idx, struct pending_cmd *ret_cmd);
+
+/**
+ * gen7_hfi_cmdq_write - Write a command to command queue
+ * @adreno_dev: Pointer to the adreno device
+ * @msg: Data to be written to the queue
+ * @size_bytes: Size of the command in bytes
+ *
+ * This function takes the cmdq lock before writing data to the queue
+
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_hfi_cmdq_write(struct adreno_device *adreno_dev, u32 *msg, u32 size_bytes);
+void adreno_gen7_receive_err_req(struct gen7_gmu_device *gmu, void *rcvd);
+void adreno_gen7_receive_debug_req(struct gen7_gmu_device *gmu, void *rcvd);
+#endif
--- a/qcom/opensource/graphics-kernel/adreno_gen7_hwsched.c
+++ b/qcom/opensource/graphics-kernel/adreno_gen7_hwsched.c
--- a/qcom/opensource/graphics-kernel/adreno_gen7_hwsched.h
+++ b/qcom/opensource/graphics-kernel/adreno_gen7_hwsched.h
@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _ADRENO_GEN7_HWSCHED_H_
+#define _ADRENO_GEN7_HWSCHED_H_
+
+#include "adreno_gen7_hwsched_hfi.h"
+
+/**
+ * struct gen7_hwsched_device - Container for the gen7 hwscheduling device
+ */
+struct gen7_hwsched_device {
+	/** @gen7_dev: Container for the gen7 device */
+	struct gen7_device gen7_dev;
+	/** @hwsched_hfi: Container for hwscheduling specific hfi resources */
+	struct gen7_hwsched_hfi hwsched_hfi;
+};
+
+/**
+ * gen7_hwsched_probe - Target specific probe for hwsched
+ * @pdev: Pointer to the platform device
+ * @chipid: Chipid of the target
+ * @gpucore: Pointer to the gpucore
+ *
+ * The target specific probe function for hwsched enabled gmu targets.
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_hwsched_probe(struct platform_device *pdev,
+		u32 chipid, const struct adreno_gpu_core *gpucore);
+
+/**
+ * gen7_hwsched_reset_replay - Restart the gmu and gpu and replay inflight cmdbatches
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_hwsched_reset_replay(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hwsched_snapshot - take gen7 hwsched snapshot
+ * @adreno_dev: Pointer to the adreno device
+ * @snapshot: Pointer to the snapshot instance
+ *
+ * Snapshot the faulty ib and then snapshot rest of gen7 gmu things
+ */
+void gen7_hwsched_snapshot(struct adreno_device *adreno_dev,
+		struct kgsl_snapshot *snapshot);
+
+/**
+ * gen7_hwsched_handle_watchdog - Handle watchdog interrupt
+ * @adreno_dev: Pointer to the adreno device
+ */
+void gen7_hwsched_handle_watchdog(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hwsched_active_count_get - Increment the active count
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * This function increments the active count. If active count
+ * is 0, this function also powers up the device.
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_hwsched_active_count_get(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hwsched_active_count_put - Put back the active count
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * This function decrements the active count sets the idle
+ * timer if active count is zero.
+ */
+void gen7_hwsched_active_count_put(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hwsched_add_to_minidump - Register hwsched_device with va minidump
+ * @adreno_dev: Pointer to the adreno device
+ */
+int gen7_hwsched_add_to_minidump(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hwsched_send_recurring_cmdobj - Dispatch IBs to GMU
+ * @adreno_dev: Pointer to adreno device structure
+ * @cmdobj: The command object which needs to be submitted
+ *
+ * This function is used to register the context if needed and submit
+ * recurring IBs to the GMU. Upon receiving ipc interrupt GMU will submit
+ * recurring IBs to GPU.
+
+ * Return: 0 on success and negative error on failure
+ */
+int gen7_hwsched_send_recurring_cmdobj(struct adreno_device *adreno_dev,
+		struct kgsl_drawobj_cmd *cmdobj);
+
+/**
+ * gen7_hwsched_fault - Set hwsched fault to request recovery
+ * @adreno_dev: A handle to adreno device
+ * @fault: The type of fault
+ */
+void gen7_hwsched_fault(struct adreno_device *adreno_dev, u32 fault);
+
+#endif
--- a/qcom/opensource/graphics-kernel/adreno_gen7_hwsched_hfi.c
+++ b/qcom/opensource/graphics-kernel/adreno_gen7_hwsched_hfi.c
--- a/qcom/opensource/graphics-kernel/adreno_gen7_hwsched_hfi.h
+++ b/qcom/opensource/graphics-kernel/adreno_gen7_hwsched_hfi.h
@ -0,0 +1,363 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _ADRENO_GEN7_HWSCHED_HFI_H_
+#define _ADRENO_GEN7_HWSCHED_HFI_H_
+
+/* Maximum number of IBs in a submission */
+#define HWSCHED_MAX_NUMIBS \
+	((HFI_MAX_MSG_SIZE - offsetof(struct hfi_issue_cmd_cmd, ibs)) \
+		/ sizeof(struct hfi_issue_ib))
+
+/*
+ * This is used to put userspace threads to sleep when hardware fence unack count reaches a
+ * threshold. This bit is cleared in two scenarios:
+ * 1. If the hardware fence unack count drops to a desired threshold.
+ * 2. If there is a GMU/GPU fault. Because we don't want the threads to keep sleeping through fault
+ *    recovery, which can easily take 100s of milliseconds to complete.
+ */
+#define GEN7_HWSCHED_HW_FENCE_SLEEP_BIT	0x0
+
+/*
+ * This is used to avoid creating any more hardware fences until the hardware fence unack count
+ * drops to a desired threshold. This bit is required in cases where GEN7_HWSCHED_HW_FENCE_SLEEP_BIT
+ * will be cleared, but we still want to avoid creating any more hardware fences. For example, if
+ * hardware fence unack count reaches a maximum threshold, both GEN7_HWSCHED_HW_FENCE_SLEEP_BIT and
+ * GEN7_HWSCHED_HW_FENCE_MAX_BIT will be set. Say, a GMU/GPU fault happens and
+ * GEN7_HWSCHED_HW_FENCE_SLEEP_BIT will be cleared to wake up any sleeping threads. But,
+ * GEN7_HWSCHED_HW_FENCE_MAX_BIT will remain set to avoid creating any new hardware fences until
+ * recovery is complete and deferred drawctxt (if any) is handled.
+ */
+#define GEN7_HWSCHED_HW_FENCE_MAX_BIT	0x1
+
+/*
+ * This is used to avoid creating any more hardware fences until concurrent reset/recovery completes
+ */
+#define GEN7_HWSCHED_HW_FENCE_ABORT_BIT 0x2
+
+struct gen7_hwsched_hfi {
+	struct hfi_mem_alloc_entry mem_alloc_table[32];
+	u32 mem_alloc_entries;
+	/** @irq_mask: Store the hfi interrupt mask */
+	u32 irq_mask;
+	/** @msglock: To protect the list of un-ACKed hfi packets */
+	rwlock_t msglock;
+	/** @msglist: List of un-ACKed hfi packets */
+	struct list_head msglist;
+	/** @f2h_task: Task for processing gmu fw to host packets */
+	struct task_struct *f2h_task;
+	/** @f2h_wq: Waitqueue for the f2h_task */
+	wait_queue_head_t f2h_wq;
+	/** @big_ib: GMU buffer to hold big IBs */
+	struct kgsl_memdesc *big_ib;
+	/** @big_ib_recurring: GMU buffer to hold big recurring IBs */
+	struct kgsl_memdesc *big_ib_recurring;
+	/** @perfctr_scratch: Buffer to hold perfcounter PM4 commands */
+	struct kgsl_memdesc *perfctr_scratch;
+	/** @msg_mutex: Mutex for accessing the msgq */
+	struct mutex msgq_mutex;
+	struct {
+		/** @lock: Spinlock for managing hardware fences */
+		spinlock_t lock;
+		/**
+		 * @unack_count: Number of hardware fences sent to GMU but haven't yet been ack'd
+		 * by GMU
+		 */
+		u32 unack_count;
+		/**
+		 * @unack_wq: Waitqueue to wait on till number of unacked hardware fences drops to
+		 * a desired threshold
+		 */
+		wait_queue_head_t unack_wq;
+		/**
+		 * @defer_drawctxt: Drawctxt to send hardware fences from as soon as unacked
+		 * hardware fences drops to a desired threshold
+		 */
+		struct adreno_context *defer_drawctxt;
+		/**
+		 * @defer_ts: The timestamp of the hardware fence which got deferred
+		 */
+		u32 defer_ts;
+		/**
+		 * @flags: Flags to control the creation of new hardware fences
+		 */
+		unsigned long flags;
+		/** @seqnum: Sequence number for hardware fence packet header */
+		atomic_t seqnum;
+	} hw_fence;
+	/**
+	 * @hw_fence_timer: Timer to trigger fault if unack'd hardware fence count does'nt drop
+	 * to a desired threshold in given amount of time
+	 */
+	struct timer_list hw_fence_timer;
+	/**
+	 * @hw_fence_ws: Work struct that gets scheduled when hw_fence_timer expires
+	 */
+	struct work_struct hw_fence_ws;
+	/** @detached_hw_fences_list: List of hardware fences belonging to detached contexts */
+	struct list_head detached_hw_fence_list;
+	/** @defer_hw_fence_work: The work structure to send deferred hardware fences to GMU */
+	struct kthread_work defer_hw_fence_work;
+};
+
+struct kgsl_drawobj_cmd;
+
+/**
+ * gen7_hwsched_hfi_probe - Probe hwsched hfi resources
+ * @adreno_dev: Pointer to adreno device structure
+ *
+ * Return: 0 on success and negative error on failure.
+ */
+int gen7_hwsched_hfi_probe(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hwsched_hfi_remove - Release hwsched hfi resources
+ * @adreno_dev: Pointer to adreno device structure
+ */
+void gen7_hwsched_hfi_remove(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hwsched_hfi_init - Initialize hfi resources
+ * @adreno_dev: Pointer to adreno device structure
+ *
+ * This function is used to initialize hfi resources
+ * once before the very first gmu boot
+ *
+ * Return: 0 on success and negative error on failure.
+ */
+int gen7_hwsched_hfi_init(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hwsched_hfi_start - Start hfi resources
+ * @adreno_dev: Pointer to adreno device structure
+ *
+ * Send the various hfi packets before booting the gpu
+ *
+ * Return: 0 on success and negative error on failure.
+ */
+int gen7_hwsched_hfi_start(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hwsched_hfi_stop - Stop the hfi resources
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * This function does the hfi cleanup when powering down the gmu
+ */
+void gen7_hwsched_hfi_stop(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hwched_cp_init - Send CP_INIT via HFI
+ * @adreno_dev: Pointer to adreno device structure
+ *
+ * This function is used to send CP INIT packet and bring
+ * GPU out of secure mode using hfi raw packets.
+ *
+ * Return: 0 on success and negative error on failure.
+ */
+int gen7_hwsched_cp_init(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hwsched_counter_inline_enable - Configure a performance counter for a countable
+ * @adreno_dev -  Adreno device to configure
+ * @group - Desired performance counter group
+ * @counter - Desired performance counter in the group
+ * @countable - Desired countable
+ *
+ * Physically set up a counter within a group with the desired countable
+ * Return 0 on success or negative error on failure.
+ */
+int gen7_hwsched_counter_inline_enable(struct adreno_device *adreno_dev,
+		const struct adreno_perfcount_group *group,
+		u32 counter, u32 countable);
+
+/**
+ * gen7_hfi_send_cmd_async - Send an hfi packet
+ * @adreno_dev: Pointer to adreno device structure
+ * @data: Data to be sent in the hfi packet
+ * @size_bytes: Size of the packet in bytes
+ *
+ * Send data in the form of an HFI packet to gmu and wait for
+ * it's ack asynchronously
+ *
+ * Return: 0 on success and negative error on failure.
+ */
+int gen7_hfi_send_cmd_async(struct adreno_device *adreno_dev, void *data, u32 size_bytes);
+
+/**
+ * gen7_hwsched_submit_drawobj - Dispatch IBs to dispatch queues
+ * @adreno_dev: Pointer to adreno device structure
+ * @drawobj: The command draw object which needs to be submitted
+ *
+ * This function is used to register the context if needed and submit
+ * IBs to the hfi dispatch queues.
+
+ * Return: 0 on success and negative error on failure
+ */
+int gen7_hwsched_submit_drawobj(struct adreno_device *adreno_dev,
+		struct kgsl_drawobj *drawobj);
+
+/**
+ * gen7_hwsched_context_detach - Unregister a context with GMU
+ * @drawctxt: Pointer to the adreno context
+ *
+ * This function sends context unregister HFI and waits for the ack
+ * to ensure all submissions from this context have retired
+ */
+void gen7_hwsched_context_detach(struct adreno_context *drawctxt);
+
+/* Helper function to get to gen7 hwsched hfi device from adreno device */
+struct gen7_hwsched_hfi *to_gen7_hwsched_hfi(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hwsched_preempt_count_get - Get preemption count from GMU
+ * @adreno_dev: Pointer to adreno device
+ *
+ * This function sends a GET_VALUE HFI packet to get the number of
+ * preemptions completed since last SLUMBER exit.
+ *
+ * Return: Preemption count
+ */
+u32 gen7_hwsched_preempt_count_get(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hwsched_lpac_cp_init - Send CP_INIT to LPAC via HFI
+ * @adreno_dev: Pointer to adreno device structure
+ *
+ * This function is used to send CP INIT packet to LPAC and
+ * enable submission to LPAC queue.
+ *
+ * Return: 0 on success and negative error on failure.
+ */
+int gen7_hwsched_lpac_cp_init(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hfi_send_lpac_feature_ctrl - Send the lpac feature hfi packet
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_hfi_send_lpac_feature_ctrl(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hwsched_context_destroy - Destroy any hwsched related resources during context destruction
+ * @adreno_dev: Pointer to adreno device
+ * @drawctxt: Pointer to the adreno context
+ *
+ * This functions destroys any hwsched related resources when this context is destroyed
+ */
+void gen7_hwsched_context_destroy(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt);
+
+/**
+ * gen7_hwsched_hfi_get_value - Send GET_VALUE packet to GMU to get the value of a property
+ * @adreno_dev: Pointer to adreno device
+ * @prop: property to get from GMU
+ *
+ * This functions sends GET_VALUE HFI packet to query value of a property
+ *
+ * Return: On success, return the value in the GMU response. On failure, return 0
+ */
+u32 gen7_hwsched_hfi_get_value(struct adreno_device *adreno_dev, u32 prop);
+
+/**
+ * gen7_send_hw_fence_hfi_wait_ack - Send hardware fence info to GMU
+ * @adreno_dev: Pointer to adreno device
+ * @entry: Pointer to the adreno hardware fence entry
+ * @flags: Flags for this hardware fence
+ *
+ * Send the hardware fence info to the GMU and wait for the ack
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen7_send_hw_fence_hfi_wait_ack(struct adreno_device *adreno_dev,
+	struct adreno_hw_fence_entry *entry, u64 flags);
+
+/**
+ * gen7_hwsched_create_hw_fence - Create a hardware fence
+ * @adreno_dev: Pointer to adreno device
+ * @kfence: Pointer to the kgsl fence
+ *
+ * Create a hardware fence, set up hardware fence info and send it to GMU if required
+ */
+void gen7_hwsched_create_hw_fence(struct adreno_device *adreno_dev,
+	struct kgsl_sync_fence *kfence);
+
+/**
+ * gen7_hwsched_drain_context_hw_fences - Drain context's hardware fences via GMU
+ * @adreno_dev: Pointer to adreno device
+ * @drawctxt: Pointer to the adreno context which is to be flushed
+ *
+ * Trigger hardware fences that were never dispatched to GMU
+ *
+ * Return: Zero on success or negative error on failure
+ */
+int gen7_hwsched_drain_context_hw_fences(struct adreno_device *adreno_dev,
+		struct adreno_context *drawctxt);
+
+/**
+ * gen7_hwsched_check_context_inflight_hw_fences - Check whether all hardware fences
+ * from a context have been sent to the TxQueue or not
+ * @adreno_dev: Pointer to adreno device
+ * @drawctxt: Pointer to the adreno context which is to be flushed
+ *
+ * Check if all hardware fences from this context have been sent to the
+ * TxQueue. If not, log an error and return error code.
+ *
+ * Return: Zero on success or negative error on failure
+ */
+int gen7_hwsched_check_context_inflight_hw_fences(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt);
+
+/**
+ * gen7_remove_hw_fence_entry - Remove hardware fence entry
+ * @adreno_dev: pointer to the adreno device
+ * @entry: Pointer to the hardware fence entry
+ */
+void gen7_remove_hw_fence_entry(struct adreno_device *adreno_dev,
+	struct adreno_hw_fence_entry *entry);
+
+/**
+ * gen7_trigger_hw_fence_cpu - Trigger hardware fence from cpu
+ * @adreno_dev: pointer to the adreno device
+ * @fence: hardware fence entry to be triggered
+ *
+ * Trigger the hardware fence by sending it to GMU's TxQueue and raise the
+ * interrupt from GMU to APPS
+ */
+void gen7_trigger_hw_fence_cpu(struct adreno_device *adreno_dev,
+	struct adreno_hw_fence_entry *fence);
+
+/**
+ * gen7_hwsched_disable_hw_fence_throttle - Disable hardware fence throttling after reset
+ * @adreno_dev: pointer to the adreno device
+ *
+ * After device reset, clear hardware fence related data structures and send any hardware fences
+ * that got deferred (prior to reset) and re-open the gates for hardware fence creation
+ *
+ * Return: Zero on success or negative error on failure
+ */
+int gen7_hwsched_disable_hw_fence_throttle(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hwsched_process_msgq - Process msgq
+ * @adreno_dev: pointer to the adreno device
+ *
+ * This function grabs the msgq mutex and processes msgq for any outstanding hfi packets
+ */
+void gen7_hwsched_process_msgq(struct adreno_device *adreno_dev);
+
+/**
+ * gen7_hwsched_boot_gpu - Send the command to boot GPU
+ * @adreno_dev: Pointer to adreno device
+ *
+ * Send the hfi to boot GPU, and check the ack, incase of a failure
+ * get a snapshot and capture registers of interest.
+ *
+ * Return: Zero on success or negative error on failure
+ */
+int gen7_hwsched_boot_gpu(struct adreno_device *adreno_dev);
+
+#endif
--- a/qcom/opensource/graphics-kernel/adreno_gen7_perfcounter.c
+++ b/qcom/opensource/graphics-kernel/adreno_gen7_perfcounter.c
--- a/qcom/opensource/graphics-kernel/adreno_gen7_preempt.c
+++ b/qcom/opensource/graphics-kernel/adreno_gen7_preempt.c
@ -0,0 +1,802 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2023, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include "adreno.h"
+#include "adreno_gen7.h"
+#include "adreno_pm4types.h"
+#include "adreno_trace.h"
+
+#define PREEMPT_RECORD(_field) \
+		offsetof(struct gen7_cp_preemption_record, _field)
+
+#define PREEMPT_SMMU_RECORD(_field) \
+		offsetof(struct gen7_cp_smmu_info, _field)
+
+static void _update_wptr(struct adreno_device *adreno_dev, bool reset_timer,
+	bool atomic)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_ringbuffer *rb = adreno_dev->cur_rb;
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&rb->preempt_lock, flags);
+
+	if (!atomic) {
+		/*
+		 * We might have skipped updating the wptr in case we are in
+		 * dispatcher context. Do it now.
+		 */
+		if (rb->skip_inline_wptr) {
+
+			ret = gen7_fenced_write(adreno_dev,
+				GEN7_CP_RB_WPTR, rb->wptr,
+				FENCE_STATUS_WRITEDROPPED0_MASK);
+
+			reset_timer = true;
+			rb->skip_inline_wptr = false;
+		}
+	} else {
+		unsigned int wptr;
+
+		kgsl_regread(device, GEN7_CP_RB_WPTR, &wptr);
+		if (wptr != rb->wptr) {
+			kgsl_regwrite(device, GEN7_CP_RB_WPTR, rb->wptr);
+			reset_timer = true;
+		}
+	}
+
+	if (reset_timer)
+		rb->dispatch_q.expires = jiffies +
+			msecs_to_jiffies(adreno_drawobj_timeout);
+
+	spin_unlock_irqrestore(&rb->preempt_lock, flags);
+
+	if (!atomic) {
+		/* If WPTR update fails, set the fault and trigger recovery */
+		if (ret) {
+			gmu_core_fault_snapshot(device);
+			adreno_dispatcher_fault(adreno_dev,
+				ADRENO_GMU_FAULT_SKIP_SNAPSHOT);
+		}
+	}
+}
+
+static void _power_collapse_set(struct adreno_device *adreno_dev, bool val)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	gmu_core_regwrite(device,
+			 GEN7_GMU_PWR_COL_PREEMPT_KEEPALIVE, (val ? 1 : 0));
+}
+
+static void _gen7_preemption_done(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int status;
+
+	/*
+	 * In the very unlikely case that the power is off, do nothing - the
+	 * state will be reset on power up and everybody will be happy
+	 */
+
+	if (!kgsl_state_is_awake(device))
+		return;
+
+	kgsl_regread(device, GEN7_CP_CONTEXT_SWITCH_CNTL, &status);
+
+	if (status & 0x1) {
+		dev_err(device->dev,
+			"Preemption not complete: status=%X cur=%d R/W=%X/%X next=%d R/W=%X/%X\n",
+			status, adreno_dev->cur_rb->id,
+			adreno_get_rptr(adreno_dev->cur_rb),
+			adreno_dev->cur_rb->wptr,
+			adreno_dev->next_rb->id,
+			adreno_get_rptr(adreno_dev->next_rb),
+			adreno_dev->next_rb->wptr);
+
+		/* Set a fault and restart */
+		adreno_dispatcher_fault(adreno_dev, ADRENO_PREEMPT_FAULT);
+
+		return;
+	}
+
+	adreno_dev->preempt.count++;
+
+	del_timer_sync(&adreno_dev->preempt.timer);
+
+	kgsl_regread(device, GEN7_CP_CONTEXT_SWITCH_LEVEL_STATUS, &status);
+
+	trace_adreno_preempt_done(adreno_dev->cur_rb->id, adreno_dev->next_rb->id,
+		status, 0);
+
+	/* Clean up all the bits */
+	adreno_dev->prev_rb = adreno_dev->cur_rb;
+	adreno_dev->cur_rb = adreno_dev->next_rb;
+	adreno_dev->next_rb = NULL;
+
+	/* Update the wptr for the new command queue */
+	_update_wptr(adreno_dev, true, false);
+
+	/* Update the dispatcher timer for the new command queue */
+	mod_timer(&adreno_dev->dispatcher.timer,
+		adreno_dev->cur_rb->dispatch_q.expires);
+
+	/* Clear the preempt state */
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE);
+}
+
+static void _gen7_preemption_fault(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int status;
+
+	/*
+	 * If the power is on check the preemption status one more time - if it
+	 * was successful then just transition to the complete state
+	 */
+	if (kgsl_state_is_awake(device)) {
+		kgsl_regread(device, GEN7_CP_CONTEXT_SWITCH_CNTL, &status);
+
+		if (!(status & 0x1)) {
+			adreno_set_preempt_state(adreno_dev,
+				ADRENO_PREEMPT_COMPLETE);
+
+			adreno_dispatcher_schedule(device);
+			return;
+		}
+	}
+
+	dev_err(device->dev,
+		"Preemption Fault: cur=%d R/W=0x%x/0x%x, next=%d R/W=0x%x/0x%x\n",
+		adreno_dev->cur_rb->id,
+		adreno_get_rptr(adreno_dev->cur_rb),
+		adreno_dev->cur_rb->wptr,
+		adreno_dev->next_rb->id,
+		adreno_get_rptr(adreno_dev->next_rb),
+		adreno_dev->next_rb->wptr);
+
+	adreno_dispatcher_fault(adreno_dev, ADRENO_PREEMPT_FAULT);
+}
+
+static void _gen7_preemption_worker(struct work_struct *work)
+{
+	struct adreno_preemption *preempt = container_of(work,
+		struct adreno_preemption, work);
+	struct adreno_device *adreno_dev = container_of(preempt,
+		struct adreno_device, preempt);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	/* Need to take the mutex to make sure that the power stays on */
+	mutex_lock(&device->mutex);
+
+	if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_FAULTED))
+		_gen7_preemption_fault(adreno_dev);
+
+	mutex_unlock(&device->mutex);
+}
+
+/* Find the highest priority active ringbuffer */
+static struct adreno_ringbuffer *gen7_next_ringbuffer(
+		struct adreno_device *adreno_dev)
+{
+	struct adreno_ringbuffer *rb;
+	unsigned long flags;
+	unsigned int i;
+
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		bool empty;
+
+		spin_lock_irqsave(&rb->preempt_lock, flags);
+		empty = adreno_rb_empty(rb);
+		spin_unlock_irqrestore(&rb->preempt_lock, flags);
+
+		if (!empty)
+			return rb;
+	}
+
+	return NULL;
+}
+
+void gen7_preemption_trigger(struct adreno_device *adreno_dev, bool atomic)
+{
+	const struct adreno_gen7_core *gen7_core = to_gen7_core(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_iommu *iommu = KGSL_IOMMU(device);
+	struct adreno_ringbuffer *next;
+	u64 ttbr0, gpuaddr;
+	u32 contextidr, cntl;
+	unsigned long flags;
+	struct adreno_preemption *preempt = &adreno_dev->preempt;
+
+	/* Put ourselves into a possible trigger state */
+	if (!adreno_move_preempt_state(adreno_dev,
+		ADRENO_PREEMPT_NONE, ADRENO_PREEMPT_START))
+		return;
+
+	/* Get the next ringbuffer to preempt in */
+	next = gen7_next_ringbuffer(adreno_dev);
+
+	/*
+	 * Nothing to do if every ringbuffer is empty or if the current
+	 * ringbuffer is the only active one
+	 */
+	if (next == NULL || next == adreno_dev->cur_rb) {
+		/*
+		 * Update any critical things that might have been skipped while
+		 * we were looking for a new ringbuffer
+		 */
+
+		if (next != NULL) {
+			_update_wptr(adreno_dev, false, atomic);
+
+			mod_timer(&adreno_dev->dispatcher.timer,
+				adreno_dev->cur_rb->dispatch_q.expires);
+		}
+
+		adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE);
+		return;
+	}
+
+	/* Turn off the dispatcher timer */
+	del_timer(&adreno_dev->dispatcher.timer);
+
+	/*
+	 * This is the most critical section - we need to take care not to race
+	 * until we have programmed the CP for the switch
+	 */
+
+	spin_lock_irqsave(&next->preempt_lock, flags);
+
+	/* Get the pagetable from the pagetable info. */
+	kgsl_sharedmem_readq(device->scratch, &ttbr0,
+		SCRATCH_RB_OFFSET(next->id, ttbr0));
+	kgsl_sharedmem_readl(device->scratch, &contextidr,
+		SCRATCH_RB_OFFSET(next->id, contextidr));
+
+	kgsl_sharedmem_writel(next->preemption_desc,
+		PREEMPT_RECORD(wptr), next->wptr);
+
+	spin_unlock_irqrestore(&next->preempt_lock, flags);
+
+	/* And write it to the smmu info */
+	if (kgsl_mmu_is_perprocess(&device->mmu)) {
+		kgsl_sharedmem_writeq(iommu->smmu_info,
+			PREEMPT_SMMU_RECORD(ttbr0), ttbr0);
+		kgsl_sharedmem_writel(iommu->smmu_info,
+			PREEMPT_SMMU_RECORD(context_idr), contextidr);
+	}
+
+	kgsl_sharedmem_readq(preempt->scratch, &gpuaddr,
+		next->id * sizeof(u64));
+
+	/*
+	 * Set a keepalive bit before the first preemption register write.
+	 * This is required since while each individual write to the context
+	 * switch registers will wake the GPU from collapse, it will not in
+	 * itself cause GPU activity. Thus, the GPU could technically be
+	 * re-collapsed between subsequent register writes leading to a
+	 * prolonged preemption sequence. The keepalive bit prevents any
+	 * further power collapse while it is set.
+	 * It is more efficient to use a keepalive+wake-on-fence approach here
+	 * rather than an OOB. Both keepalive and the fence are effectively
+	 * free when the GPU is already powered on, whereas an OOB requires an
+	 * unconditional handshake with the GMU.
+	 */
+	_power_collapse_set(adreno_dev, true);
+
+	/*
+	 * Fenced writes on this path will make sure the GPU is woken up
+	 * in case it was power collapsed by the GMU.
+	 */
+	if (gen7_fenced_write(adreno_dev,
+		GEN7_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_LO,
+		lower_32_bits(next->preemption_desc->gpuaddr),
+		FENCE_STATUS_WRITEDROPPED1_MASK))
+		goto err;
+
+	/*
+	 * Above fence writes will make sure GMU comes out of
+	 * IFPC state if its was in IFPC state but it doesn't
+	 * guarantee that GMU FW actually moved to ACTIVE state
+	 * i.e. wake-up from IFPC is complete.
+	 * Wait for GMU to move to ACTIVE state before triggering
+	 * preemption. This is require to make sure CP doesn't
+	 * interrupt GMU during wake-up from IFPC.
+	 */
+	if (!atomic && gmu_core_dev_wait_for_active_transition(device))
+		goto err;
+
+	if (gen7_fenced_write(adreno_dev,
+		GEN7_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_HI,
+		upper_32_bits(next->preemption_desc->gpuaddr),
+		FENCE_STATUS_WRITEDROPPED1_MASK))
+		goto err;
+
+	if (gen7_fenced_write(adreno_dev,
+		GEN7_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_LO,
+		lower_32_bits(next->secure_preemption_desc->gpuaddr),
+		FENCE_STATUS_WRITEDROPPED1_MASK))
+		goto err;
+
+	if (gen7_fenced_write(adreno_dev,
+		GEN7_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_HI,
+		upper_32_bits(next->secure_preemption_desc->gpuaddr),
+		FENCE_STATUS_WRITEDROPPED1_MASK))
+		goto err;
+
+	if (gen7_fenced_write(adreno_dev,
+		GEN7_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_LO,
+		lower_32_bits(gpuaddr),
+		FENCE_STATUS_WRITEDROPPED1_MASK))
+		goto err;
+
+	if (gen7_fenced_write(adreno_dev,
+		GEN7_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_HI,
+		upper_32_bits(gpuaddr),
+		FENCE_STATUS_WRITEDROPPED1_MASK))
+		goto err;
+
+	adreno_dev->next_rb = next;
+
+	/* Start the timer to detect a stuck preemption */
+	mod_timer(&adreno_dev->preempt.timer,
+		jiffies + msecs_to_jiffies(ADRENO_PREEMPT_TIMEOUT));
+
+	cntl = (preempt->preempt_level << 6) | 0x01;
+
+	/* Skip save/restore during L1 preemption */
+	if (preempt->skipsaverestore)
+		cntl |= (1 << 9);
+
+	/* Enable GMEM save/restore across preemption */
+	if (preempt->usesgmem)
+		cntl |= (1 << 8);
+
+	trace_adreno_preempt_trigger(adreno_dev->cur_rb->id, adreno_dev->next_rb->id,
+		cntl, 0);
+
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_TRIGGERED);
+
+	if (gen7_core->qos_value)
+		kgsl_sharedmem_writel(preempt->scratch,
+			PREEMPT_SCRATCH_OFFSET(QOS_VALUE_IDX),
+			gen7_core->qos_value[next->id]);
+
+	/* Trigger the preemption */
+	if (gen7_fenced_write(adreno_dev, GEN7_CP_CONTEXT_SWITCH_CNTL, cntl,
+					FENCE_STATUS_WRITEDROPPED1_MASK)) {
+		adreno_dev->next_rb = NULL;
+		del_timer(&adreno_dev->preempt.timer);
+		goto err;
+	}
+
+	return;
+err:
+	/* If fenced write fails, take inline snapshot and trigger recovery */
+	if (!in_interrupt()) {
+		gmu_core_fault_snapshot(device);
+		adreno_dispatcher_fault(adreno_dev,
+			ADRENO_GMU_FAULT_SKIP_SNAPSHOT);
+	} else {
+		adreno_dispatcher_fault(adreno_dev, ADRENO_GMU_FAULT);
+	}
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE);
+	/* Clear the keep alive */
+	_power_collapse_set(adreno_dev, false);
+
+}
+
+void gen7_preemption_callback(struct adreno_device *adreno_dev, int bit)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int status;
+
+	if (!adreno_move_preempt_state(adreno_dev,
+		ADRENO_PREEMPT_TRIGGERED, ADRENO_PREEMPT_PENDING))
+		return;
+
+	kgsl_regread(device, GEN7_CP_CONTEXT_SWITCH_CNTL, &status);
+
+	if (status & 0x1) {
+		dev_err(KGSL_DEVICE(adreno_dev)->dev,
+			"preempt interrupt with non-zero status: %X\n",
+			status);
+
+		/*
+		 * Under the assumption that this is a race between the
+		 * interrupt and the register, schedule the worker to clean up.
+		 * If the status still hasn't resolved itself by the time we get
+		 * there then we have to assume something bad happened
+		 */
+		adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_COMPLETE);
+		adreno_dispatcher_schedule(KGSL_DEVICE(adreno_dev));
+		return;
+	}
+
+	adreno_dev->preempt.count++;
+
+	/*
+	 * We can now safely clear the preemption keepalive bit, allowing
+	 * power collapse to resume its regular activity.
+	 */
+	_power_collapse_set(adreno_dev, false);
+
+	del_timer(&adreno_dev->preempt.timer);
+
+	kgsl_regread(device, GEN7_CP_CONTEXT_SWITCH_LEVEL_STATUS, &status);
+
+	trace_adreno_preempt_done(adreno_dev->cur_rb->id, adreno_dev->next_rb->id,
+		status, 0);
+
+	adreno_dev->prev_rb = adreno_dev->cur_rb;
+	adreno_dev->cur_rb = adreno_dev->next_rb;
+	adreno_dev->next_rb = NULL;
+
+	/* Update the wptr if it changed while preemption was ongoing */
+	_update_wptr(adreno_dev, true, true);
+
+	/* Update the dispatcher timer for the new command queue */
+	mod_timer(&adreno_dev->dispatcher.timer,
+		adreno_dev->cur_rb->dispatch_q.expires);
+
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE);
+
+	gen7_preemption_trigger(adreno_dev, true);
+}
+
+void gen7_preemption_prepare_postamble(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_gen7_core *gen7_core = to_gen7_core(adreno_dev);
+	struct adreno_preemption *preempt = &adreno_dev->preempt;
+	u32 *postamble, count = 0;
+
+	/*
+	 * First 28 dwords of the device scratch buffer are used to store shadow rb data.
+	 * Reserve 15 dwords in the device scratch buffer from SCRATCH_POSTAMBLE_OFFSET for
+	 * KMD postamble pm4 packets. This should be in *device->scratch* so that userspace
+	 * cannot access it.
+	 */
+	postamble = device->scratch->hostptr + SCRATCH_POSTAMBLE_OFFSET;
+
+	/*
+	 * Reserve 4 dwords in the scratch buffer for dynamic QOS control feature. To ensure QOS
+	 * value is updated for first preemption, send it during bootup
+	 */
+	if (gen7_core->qos_value) {
+		postamble[count++] = cp_type7_packet(CP_MEM_TO_REG, 3);
+		postamble[count++] = GEN7_RBBM_GBIF_CLIENT_QOS_CNTL;
+		postamble[count++] = lower_32_bits(PREEMPT_SCRATCH_ADDR(adreno_dev, QOS_VALUE_IDX));
+		postamble[count++] = upper_32_bits(PREEMPT_SCRATCH_ADDR(adreno_dev, QOS_VALUE_IDX));
+	}
+
+	/*
+	 * Since postambles are not preserved across slumber, necessary packets
+	 * must be sent to GPU before first submission.
+	 *
+	 * If a packet needs to be sent before first submission, add it above this.
+	 */
+	preempt->postamble_bootup_len = count;
+
+	/* Reserve 11 dwords in the device scratch buffer to clear perfcounters */
+	if (!adreno_dev->perfcounter) {
+		postamble[count++] = cp_type7_packet(CP_REG_RMW, 3);
+		postamble[count++] = GEN7_RBBM_PERFCTR_SRAM_INIT_CMD;
+		postamble[count++] = 0x0;
+		postamble[count++] = 0x1;
+
+		postamble[count++] = cp_type7_packet(CP_WAIT_REG_MEM, 6);
+		postamble[count++] = 0x3;
+		postamble[count++] = GEN7_RBBM_PERFCTR_SRAM_INIT_STATUS;
+		postamble[count++] = 0x0;
+		postamble[count++] = 0x1;
+		postamble[count++] = 0x1;
+		postamble[count++] = 0x0;
+	}
+
+	preempt->postamble_len = count;
+}
+
+void gen7_preemption_schedule(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return;
+
+	mutex_lock(&device->mutex);
+
+	if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_COMPLETE))
+		_gen7_preemption_done(adreno_dev);
+
+	gen7_preemption_trigger(adreno_dev, false);
+
+	mutex_unlock(&device->mutex);
+}
+
+u32 gen7_preemption_pre_ibsubmit(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb, struct adreno_context *drawctxt,
+		u32 *cmds)
+{
+	u32 *cmds_orig = cmds;
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return 0;
+
+	if (test_and_set_bit(ADRENO_RB_SET_PSEUDO_DONE, &rb->flags))
+		goto done;
+
+	*cmds++ = cp_type7_packet(CP_THREAD_CONTROL, 1);
+	*cmds++ = CP_SET_THREAD_BR;
+
+	*cmds++ = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 12);
+
+	/* NULL SMMU_INFO buffer - we track in KMD */
+	*cmds++ = SET_PSEUDO_SMMU_INFO;
+	cmds += cp_gpuaddr(adreno_dev, cmds, 0x0);
+
+	*cmds++ = SET_PSEUDO_PRIV_NON_SECURE_SAVE_ADDR;
+	cmds += cp_gpuaddr(adreno_dev, cmds, rb->preemption_desc->gpuaddr);
+
+	*cmds++ = SET_PSEUDO_PRIV_SECURE_SAVE_ADDR;
+	cmds += cp_gpuaddr(adreno_dev, cmds,
+			rb->secure_preemption_desc->gpuaddr);
+
+	/*
+	 * There is no need to specify this address when we are about to
+	 * trigger preemption. This is because CP internally stores this
+	 * address specified here in the CP_SET_PSEUDO_REGISTER payload to
+	 * the context record and thus knows from where to restore
+	 * the saved perfcounters for the new ringbuffer.
+	 */
+	*cmds++ = SET_PSEUDO_COUNTER;
+	cmds += cp_gpuaddr(adreno_dev, cmds,
+			rb->perfcounter_save_restore_desc->gpuaddr);
+
+done:
+	if (drawctxt) {
+		struct adreno_ringbuffer *rb = drawctxt->rb;
+		u64 dest = PREEMPT_SCRATCH_ADDR(adreno_dev, rb->id);
+		u64 gpuaddr = drawctxt->base.user_ctxt_record->memdesc.gpuaddr;
+
+		*cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 2);
+		cmds += cp_gpuaddr(adreno_dev, cmds, dest);
+		*cmds++ = lower_32_bits(gpuaddr);
+		*cmds++ = upper_32_bits(gpuaddr);
+
+		if (adreno_dev->preempt.postamble_len) {
+			u64 kmd_postamble_addr = SCRATCH_POSTAMBLE_ADDR(KGSL_DEVICE(adreno_dev));
+
+			*cmds++ = cp_type7_packet(CP_SET_AMBLE, 3);
+			*cmds++ = lower_32_bits(kmd_postamble_addr);
+			*cmds++ = upper_32_bits(kmd_postamble_addr);
+			*cmds++ = FIELD_PREP(GENMASK(22, 20), CP_KMD_AMBLE_TYPE)
+				| (FIELD_PREP(GENMASK(19, 0), adreno_dev->preempt.postamble_len));
+		}
+	}
+
+	return (unsigned int) (cmds - cmds_orig);
+}
+
+u32 gen7_preemption_post_ibsubmit(struct adreno_device *adreno_dev,
+		u32 *cmds)
+{
+	u32 index = 0;
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return 0;
+
+	if (adreno_dev->cur_rb) {
+		u64 dest = PREEMPT_SCRATCH_ADDR(adreno_dev, adreno_dev->cur_rb->id);
+
+		cmds[index++] = cp_type7_packet(CP_MEM_WRITE, 4);
+		cmds[index++] = lower_32_bits(dest);
+		cmds[index++] = upper_32_bits(dest);
+		cmds[index++] = 0;
+		cmds[index++] = 0;
+	}
+
+	cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+	cmds[index++] = CP_SET_THREAD_BOTH;
+	cmds[index++] = cp_type7_packet(CP_CONTEXT_SWITCH_YIELD, 4);
+	cmds[index++] = 0;
+	cmds[index++] = 0;
+	cmds[index++] = 1;
+	cmds[index++] = 0;
+
+	return index;
+}
+
+void gen7_preemption_start(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_iommu *iommu = KGSL_IOMMU(device);
+	struct adreno_ringbuffer *rb;
+	unsigned int i;
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return;
+
+	/* Force the state to be clear */
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE);
+
+	if (kgsl_mmu_is_perprocess(&device->mmu)) {
+		/* smmu_info is allocated and mapped in gen7_preemption_iommu_init */
+		kgsl_sharedmem_writel(iommu->smmu_info,
+			PREEMPT_SMMU_RECORD(magic), GEN7_CP_SMMU_INFO_MAGIC_REF);
+		kgsl_sharedmem_writeq(iommu->smmu_info,
+			PREEMPT_SMMU_RECORD(ttbr0), MMU_DEFAULT_TTBR0(device));
+
+		/* The CP doesn't use the asid record, so poison it */
+		kgsl_sharedmem_writel(iommu->smmu_info,
+			PREEMPT_SMMU_RECORD(asid), 0xdecafbad);
+		kgsl_sharedmem_writel(iommu->smmu_info,
+			PREEMPT_SMMU_RECORD(context_idr), 0);
+
+		kgsl_regwrite(device, GEN7_CP_CONTEXT_SWITCH_SMMU_INFO_LO,
+			lower_32_bits(iommu->smmu_info->gpuaddr));
+
+		kgsl_regwrite(device, GEN7_CP_CONTEXT_SWITCH_SMMU_INFO_HI,
+			upper_32_bits(iommu->smmu_info->gpuaddr));
+	}
+
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		kgsl_sharedmem_writel(rb->preemption_desc,
+			PREEMPT_RECORD(rptr), 0);
+		kgsl_sharedmem_writel(rb->preemption_desc,
+			PREEMPT_RECORD(wptr), 0);
+
+		adreno_ringbuffer_set_pagetable(device, rb,
+			device->mmu.defaultpagetable);
+
+		clear_bit(ADRENO_RB_SET_PSEUDO_DONE, &rb->flags);
+	}
+}
+
+static void reset_rb_preempt_record(struct adreno_device *adreno_dev,
+	struct adreno_ringbuffer *rb)
+{
+	memset(rb->preemption_desc->hostptr, 0x0, rb->preemption_desc->size);
+
+	kgsl_sharedmem_writel(rb->preemption_desc,
+		PREEMPT_RECORD(magic), GEN7_CP_CTXRECORD_MAGIC_REF);
+	kgsl_sharedmem_writel(rb->preemption_desc,
+		PREEMPT_RECORD(cntl), GEN7_CP_RB_CNTL_DEFAULT);
+	kgsl_sharedmem_writeq(rb->preemption_desc,
+		PREEMPT_RECORD(rptr_addr), SCRATCH_RB_GPU_ADDR(
+		KGSL_DEVICE(adreno_dev), rb->id, rptr));
+	kgsl_sharedmem_writeq(rb->preemption_desc,
+		PREEMPT_RECORD(rbase), rb->buffer_desc->gpuaddr);
+	kgsl_sharedmem_writeq(rb->preemption_desc,
+		PREEMPT_RECORD(bv_rptr_addr), SCRATCH_RB_GPU_ADDR(
+		KGSL_DEVICE(adreno_dev), rb->id, bv_rptr));
+}
+
+void gen7_reset_preempt_records(struct adreno_device *adreno_dev)
+{
+	int i;
+	struct adreno_ringbuffer *rb;
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return;
+
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		reset_rb_preempt_record(adreno_dev, rb);
+	}
+}
+
+static int gen7_preemption_ringbuffer_init(struct adreno_device *adreno_dev,
+	struct adreno_ringbuffer *rb)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_gen7_core *gen7_core = to_gen7_core(adreno_dev);
+	u64 ctxt_record_size = GEN7_CP_CTXRECORD_SIZE_IN_BYTES;
+	int ret;
+
+	if (gen7_core->ctxt_record_size)
+		ctxt_record_size = gen7_core->ctxt_record_size;
+
+	ret = adreno_allocate_global(device, &rb->preemption_desc,
+		ctxt_record_size, SZ_16K, 0,
+		KGSL_MEMDESC_PRIVILEGED, "preemption_desc");
+	if (ret)
+		return ret;
+
+	ret = adreno_allocate_global(device, &rb->secure_preemption_desc,
+		ctxt_record_size, 0,
+		KGSL_MEMFLAGS_SECURE, KGSL_MEMDESC_PRIVILEGED,
+		"secure_preemption_desc");
+	if (ret)
+		return ret;
+
+	ret = adreno_allocate_global(device, &rb->perfcounter_save_restore_desc,
+		GEN7_CP_PERFCOUNTER_SAVE_RESTORE_SIZE, 0, 0,
+		KGSL_MEMDESC_PRIVILEGED,
+		"perfcounter_save_restore_desc");
+	if (ret)
+		return ret;
+
+	reset_rb_preempt_record(adreno_dev, rb);
+
+	return 0;
+}
+
+int gen7_preemption_init(struct adreno_device *adreno_dev)
+{
+	u32 flags = ADRENO_FEATURE(adreno_dev, ADRENO_APRIV) ? KGSL_MEMDESC_PRIVILEGED : 0;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_iommu *iommu = KGSL_IOMMU(device);
+	struct adreno_preemption *preempt = &adreno_dev->preempt;
+	struct adreno_ringbuffer *rb;
+	int ret;
+	unsigned int i;
+
+	/* We are dependent on IOMMU to make preemption go on the CP side */
+	if (kgsl_mmu_get_mmutype(device) != KGSL_MMU_TYPE_IOMMU) {
+		ret = -ENODEV;
+		goto done;
+	}
+
+	INIT_WORK(&preempt->work, _gen7_preemption_worker);
+
+	/* Allocate mem for storing preemption switch record */
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		ret = gen7_preemption_ringbuffer_init(adreno_dev, rb);
+		if (ret)
+			goto done;
+	}
+
+	ret = adreno_allocate_global(device, &preempt->scratch, PAGE_SIZE,
+			0, 0, flags, "preempt_scratch");
+	if (ret)
+		goto done;
+
+	/* Allocate mem for storing preemption smmu record */
+	if (kgsl_mmu_is_perprocess(&device->mmu)) {
+		ret = adreno_allocate_global(device, &iommu->smmu_info, PAGE_SIZE, 0,
+			KGSL_MEMFLAGS_GPUREADONLY, KGSL_MEMDESC_PRIVILEGED,
+			"smmu_info");
+		if (ret)
+			goto done;
+	}
+
+	return 0;
+done:
+	clear_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv);
+	return ret;
+}
+
+int gen7_preemption_context_init(struct kgsl_context *context)
+{
+	struct kgsl_device *device = context->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	u64 flags = 0;
+
+	if (!adreno_preemption_feature_set(adreno_dev))
+		return 0;
+
+	if (context->flags & KGSL_CONTEXT_SECURE)
+		flags |= KGSL_MEMFLAGS_SECURE;
+
+	if (is_compat_task())
+		flags |= KGSL_MEMFLAGS_FORCE_32BIT;
+
+	/*
+	 * gpumem_alloc_entry takes an extra refcount. Put it only when
+	 * destroying the context to keep the context record valid
+	 */
+	context->user_ctxt_record = gpumem_alloc_entry(context->dev_priv,
+			GEN7_CP_CTXRECORD_USER_RESTORE_SIZE, flags);
+	if (IS_ERR(context->user_ctxt_record)) {
+		int ret = PTR_ERR(context->user_ctxt_record);
+
+		context->user_ctxt_record = NULL;
+		return ret;
+	}
+
+	return 0;
+}
--- a/qcom/opensource/graphics-kernel/adreno_gen7_ringbuffer.c
+++ b/qcom/opensource/graphics-kernel/adreno_gen7_ringbuffer.c
@ -0,0 +1,647 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include "adreno.h"
+#include "adreno_gen7.h"
+#include "adreno_pm4types.h"
+#include "adreno_ringbuffer.h"
+#include "adreno_trace.h"
+#include "kgsl_trace.h"
+
+static bool is_concurrent_binning(struct adreno_context *drawctxt)
+{
+	if (!drawctxt)
+		return false;
+
+	return !(drawctxt->base.flags & KGSL_CONTEXT_SECURE);
+}
+
+static int gen7_rb_pagetable_switch(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb, struct adreno_context *drawctxt,
+		struct kgsl_pagetable *pagetable, u32 *cmds)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u64 ttbr0 = kgsl_mmu_pagetable_get_ttbr0(pagetable);
+	int count = 0;
+	u32 id = drawctxt ? drawctxt->base.id : 0;
+
+	if (pagetable == device->mmu.defaultpagetable)
+		return 0;
+
+	/* CP switches the pagetable and flushes the Caches */
+	cmds[count++] = cp_type7_packet(CP_SMMU_TABLE_UPDATE, 3);
+	cmds[count++] = lower_32_bits(ttbr0);
+	cmds[count++] = upper_32_bits(ttbr0);
+	cmds[count++] = id;
+
+	cmds[count++] = cp_type7_packet(CP_MEM_WRITE, 5);
+	cmds[count++] = lower_32_bits(SCRATCH_RB_GPU_ADDR(device,
+				rb->id, ttbr0));
+	cmds[count++] = upper_32_bits(SCRATCH_RB_GPU_ADDR(device,
+				rb->id, ttbr0));
+	cmds[count++] = lower_32_bits(ttbr0);
+	cmds[count++] = upper_32_bits(ttbr0);
+	cmds[count++] = id;
+
+	/*
+	 * Sync both threads after switching pagetables and enable BR only
+	 * to make sure BV doesn't race ahead while BR is still switching
+	 * pagetables.
+	 */
+	cmds[count++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+	cmds[count++] = CP_SYNC_THREADS | CP_SET_THREAD_BR;
+
+	return count;
+}
+
+static int gen7_rb_context_switch(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb,
+		struct adreno_context *drawctxt)
+{
+	struct kgsl_pagetable *pagetable =
+		adreno_drawctxt_get_pagetable(drawctxt);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int count = 0;
+	u32 cmds[55];
+
+	/* Sync both threads */
+	cmds[count++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+	cmds[count++] = CP_SYNC_THREADS | CP_SET_THREAD_BOTH;
+	/* Reset context state */
+	cmds[count++] = cp_type7_packet(CP_RESET_CONTEXT_STATE, 1);
+	cmds[count++] = CP_RESET_GLOBAL_LOCAL_TS | CP_CLEAR_BV_BR_COUNTER |
+			CP_CLEAR_RESOURCE_TABLE | CP_CLEAR_ON_CHIP_TS;
+	/*
+	 * Enable/disable concurrent binning for pagetable switch and
+	 * set the thread to BR since only BR can execute the pagetable
+	 * switch packets.
+	 */
+	/* Sync both threads and enable BR only */
+	cmds[count++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+	cmds[count++] = CP_SYNC_THREADS | CP_SET_THREAD_BR;
+
+	if (adreno_drawctxt_get_pagetable(rb->drawctxt_active) != pagetable) {
+
+		/* Clear performance counters during context switches */
+		if (!adreno_dev->perfcounter) {
+			cmds[count++] = cp_type4_packet(GEN7_RBBM_PERFCTR_SRAM_INIT_CMD, 1);
+			cmds[count++] = 0x1;
+		}
+
+		count += gen7_rb_pagetable_switch(adreno_dev, rb,
+			drawctxt, pagetable, &cmds[count]);
+
+		/* Wait for performance counter clear to finish */
+		if (!adreno_dev->perfcounter) {
+			cmds[count++] = cp_type7_packet(CP_WAIT_REG_MEM, 6);
+			cmds[count++] = 0x3;
+			cmds[count++] = GEN7_RBBM_PERFCTR_SRAM_INIT_STATUS;
+			cmds[count++] = 0x0;
+			cmds[count++] = 0x1;
+			cmds[count++] = 0x1;
+			cmds[count++] = 0x0;
+		}
+	} else {
+		struct kgsl_iommu *iommu = KGSL_IOMMU(device);
+
+		u32 offset = GEN7_SMMU_BASE + (iommu->cb0_offset >> 2) + 0x0d;
+
+		/*
+		 * Set the CONTEXTIDR register to the current context id so we
+		 * can use it in pagefault debugging. Unlike TTBR0 we don't
+		 * need any special sequence or locking to change it
+		 */
+		cmds[count++] = cp_type4_packet(offset, 1);
+		cmds[count++] = drawctxt->base.id;
+	}
+
+	cmds[count++] = cp_type7_packet(CP_NOP, 1);
+	cmds[count++] = CONTEXT_TO_MEM_IDENTIFIER;
+
+	cmds[count++] = cp_type7_packet(CP_MEM_WRITE, 3);
+	cmds[count++] = lower_32_bits(MEMSTORE_RB_GPU_ADDR(device, rb,
+				current_context));
+	cmds[count++] = upper_32_bits(MEMSTORE_RB_GPU_ADDR(device, rb,
+				current_context));
+	cmds[count++] = drawctxt->base.id;
+
+	cmds[count++] = cp_type7_packet(CP_MEM_WRITE, 3);
+	cmds[count++] = lower_32_bits(MEMSTORE_ID_GPU_ADDR(device,
+		KGSL_MEMSTORE_GLOBAL, current_context));
+	cmds[count++] = upper_32_bits(MEMSTORE_ID_GPU_ADDR(device,
+		KGSL_MEMSTORE_GLOBAL, current_context));
+	cmds[count++] = drawctxt->base.id;
+
+	cmds[count++] = cp_type7_packet(CP_EVENT_WRITE, 1);
+	cmds[count++] = 0x31;
+
+	if (adreno_is_preemption_enabled(adreno_dev)) {
+		u64 gpuaddr = drawctxt->base.user_ctxt_record->memdesc.gpuaddr;
+
+		cmds[count++] = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 3);
+		cmds[count++] = SET_PSEUDO_NON_PRIV_SAVE_ADDR;
+		cmds[count++] = lower_32_bits(gpuaddr);
+		cmds[count++] = upper_32_bits(gpuaddr);
+	}
+
+	return gen7_ringbuffer_addcmds(adreno_dev, rb, NULL, F_NOTPROTECTED,
+			cmds, count, 0, NULL);
+}
+
+#define RB_SOPTIMESTAMP(device, rb) \
+	MEMSTORE_RB_GPU_ADDR(device, rb, soptimestamp)
+#define CTXT_SOPTIMESTAMP(device, drawctxt) \
+	MEMSTORE_ID_GPU_ADDR(device, (drawctxt)->base.id, soptimestamp)
+
+#define RB_EOPTIMESTAMP(device, rb) \
+	MEMSTORE_RB_GPU_ADDR(device, rb, eoptimestamp)
+#define CTXT_EOPTIMESTAMP(device, drawctxt) \
+	MEMSTORE_ID_GPU_ADDR(device, (drawctxt)->base.id, eoptimestamp)
+
+int gen7_ringbuffer_submit(struct adreno_ringbuffer *rb,
+		struct adreno_submit_time *time)
+{
+	struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret = 0;
+	unsigned long flags;
+
+	adreno_get_submit_time(adreno_dev, rb, time);
+	adreno_profile_submit_time(time);
+
+	spin_lock_irqsave(&rb->preempt_lock, flags);
+	if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE)) {
+		if (adreno_dev->cur_rb == rb) {
+			kgsl_pwrscale_busy(device);
+			ret = gen7_fenced_write(adreno_dev,
+				GEN7_CP_RB_WPTR, rb->_wptr,
+				FENCE_STATUS_WRITEDROPPED0_MASK);
+			rb->skip_inline_wptr = false;
+		}
+	} else {
+		if (adreno_dev->cur_rb == rb)
+			rb->skip_inline_wptr = true;
+	}
+
+	rb->wptr = rb->_wptr;
+	spin_unlock_irqrestore(&rb->preempt_lock, flags);
+
+	if (ret) {
+		/*
+		 * If WPTR update fails, take inline snapshot and trigger
+		 * recovery.
+		 */
+		gmu_core_fault_snapshot(device);
+		adreno_dispatcher_fault(adreno_dev,
+			ADRENO_GMU_FAULT_SKIP_SNAPSHOT);
+	}
+
+	return ret;
+}
+
+int gen7_ringbuffer_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int i, ret;
+
+	ret = adreno_allocate_global(device, &device->scratch, PAGE_SIZE,
+			0, 0, KGSL_MEMDESC_RANDOM | KGSL_MEMDESC_PRIVILEGED,
+			"scratch");
+	if (ret)
+		return ret;
+
+	adreno_dev->cur_rb = &(adreno_dev->ringbuffers[0]);
+
+	if (!adreno_preemption_feature_set(adreno_dev)) {
+		adreno_dev->num_ringbuffers = 1;
+		return adreno_ringbuffer_setup(adreno_dev,
+				&adreno_dev->ringbuffers[0], 0);
+	}
+
+	adreno_dev->num_ringbuffers = ARRAY_SIZE(adreno_dev->ringbuffers);
+
+	for (i = 0; i < adreno_dev->num_ringbuffers; i++) {
+		int ret;
+
+		ret = adreno_ringbuffer_setup(adreno_dev,
+				&adreno_dev->ringbuffers[i], i);
+		if (ret)
+			return ret;
+	}
+
+	timer_setup(&adreno_dev->preempt.timer, adreno_preemption_timer, 0);
+	gen7_preemption_init(adreno_dev);
+	return 0;
+}
+
+#define GEN7_SUBMIT_MAX 104
+
+int gen7_ringbuffer_addcmds(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb, struct adreno_context *drawctxt,
+		u32 flags, u32 *in, u32 dwords, u32 timestamp,
+		struct adreno_submit_time *time)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 size = GEN7_SUBMIT_MAX + dwords;
+	u32 *cmds, index = 0;
+	u64 profile_gpuaddr;
+	u32 profile_dwords;
+
+	if (adreno_drawctxt_detached(drawctxt))
+		return -ENOENT;
+
+	if (adreno_gpu_fault(adreno_dev) != 0)
+		return -EPROTO;
+
+	rb->timestamp++;
+
+	if (drawctxt)
+		drawctxt->internal_timestamp = rb->timestamp;
+
+	/* All submissions are run with protected mode off due to APRIV */
+	flags &= ~F_NOTPROTECTED;
+
+	cmds = adreno_ringbuffer_allocspace(rb, size);
+	if (IS_ERR(cmds))
+		return PTR_ERR(cmds);
+
+	/* Identify the start of a command */
+	cmds[index++] = cp_type7_packet(CP_NOP, 1);
+	cmds[index++] = drawctxt ? CMD_IDENTIFIER : CMD_INTERNAL_IDENTIFIER;
+
+	/* This is 25 dwords when drawctxt is not NULL and perfcounter needs to be zapped*/
+	index += gen7_preemption_pre_ibsubmit(adreno_dev, rb, drawctxt,
+		&cmds[index]);
+
+	cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+	cmds[index++] = CP_SET_THREAD_BOTH;
+
+	cmds[index++] = cp_type7_packet(CP_SET_MARKER, 1);
+	cmds[index++] = 0x101; /* IFPC disable */
+
+	cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+	cmds[index++] = CP_SET_THREAD_BR;
+
+	profile_gpuaddr = adreno_profile_preib_processing(adreno_dev,
+		drawctxt, &profile_dwords);
+
+	if (profile_gpuaddr) {
+		cmds[index++] = cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3);
+		cmds[index++] = lower_32_bits(profile_gpuaddr);
+		cmds[index++] = upper_32_bits(profile_gpuaddr);
+		cmds[index++] = profile_dwords;
+	}
+
+	if (drawctxt) {
+		cmds[index++] = cp_type7_packet(CP_MEM_WRITE, 3);
+		cmds[index++] = lower_32_bits(CTXT_SOPTIMESTAMP(device,
+					drawctxt));
+		cmds[index++] = upper_32_bits(CTXT_SOPTIMESTAMP(device,
+					drawctxt));
+		cmds[index++] = timestamp;
+	}
+
+	cmds[index++] = cp_type7_packet(CP_MEM_WRITE, 3);
+	cmds[index++] = lower_32_bits(RB_SOPTIMESTAMP(device, rb));
+	cmds[index++] = upper_32_bits(RB_SOPTIMESTAMP(device, rb));
+	cmds[index++] = rb->timestamp;
+
+	if (IS_SECURE(flags)) {
+		/* Sync BV and BR if entering secure mode */
+		cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+		cmds[index++] = CP_SYNC_THREADS | CP_CONCURRENT_BIN_DISABLE;
+		cmds[index++] = cp_type7_packet(CP_SET_SECURE_MODE, 1);
+		cmds[index++] = 1;
+	}
+
+	memcpy(&cmds[index], in, dwords << 2);
+	index += dwords;
+
+	profile_gpuaddr = adreno_profile_postib_processing(adreno_dev,
+		drawctxt, &dwords);
+
+	if (profile_gpuaddr) {
+		cmds[index++] = cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3);
+		cmds[index++] = lower_32_bits(profile_gpuaddr);
+		cmds[index++] = upper_32_bits(profile_gpuaddr);
+		cmds[index++] = profile_dwords;
+	}
+
+	if (test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE, &device->mmu.pfpolicy))
+		cmds[index++] = cp_type7_packet(CP_WAIT_MEM_WRITES, 0);
+
+	if (is_concurrent_binning(drawctxt)) {
+		u64 addr = SCRATCH_RB_GPU_ADDR(device, rb->id, bv_ts);
+
+		cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+		cmds[index++] = CP_SET_THREAD_BV;
+
+		/*
+		 * Make sure the timestamp is committed once BV pipe is
+		 * completely done with this submission.
+		 */
+		cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4);
+		cmds[index++] = CACHE_CLEAN | BIT(27);
+		cmds[index++] = lower_32_bits(addr);
+		cmds[index++] = upper_32_bits(addr);
+		cmds[index++] = rb->timestamp;
+
+		cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+		cmds[index++] = CP_SET_THREAD_BR;
+
+		/*
+		 * This makes sure that BR doesn't race ahead and commit
+		 * timestamp to memstore while BV is still processing
+		 * this submission.
+		 */
+		cmds[index++] = cp_type7_packet(CP_WAIT_TIMESTAMP, 4);
+		cmds[index++] = 0;
+		cmds[index++] = lower_32_bits(addr);
+		cmds[index++] = upper_32_bits(addr);
+		cmds[index++] = rb->timestamp;
+	}
+
+	/*
+	 * If this is an internal command, just write the ringbuffer timestamp,
+	 * otherwise, write both
+	 */
+	if (!drawctxt) {
+		cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4);
+		cmds[index++] = CACHE_CLEAN | BIT(31) | BIT(27);
+		cmds[index++] = lower_32_bits(RB_EOPTIMESTAMP(device, rb));
+		cmds[index++] = upper_32_bits(RB_EOPTIMESTAMP(device, rb));
+		cmds[index++] = rb->timestamp;
+	} else {
+		cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4);
+		cmds[index++] = CACHE_CLEAN | BIT(31) | BIT(27);
+		cmds[index++] = lower_32_bits(CTXT_EOPTIMESTAMP(device,
+					drawctxt));
+		cmds[index++] = upper_32_bits(CTXT_EOPTIMESTAMP(device,
+					drawctxt));
+		cmds[index++] = timestamp;
+
+		cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4);
+		cmds[index++] = CACHE_CLEAN | BIT(27);
+		cmds[index++] = lower_32_bits(RB_EOPTIMESTAMP(device, rb));
+		cmds[index++] = upper_32_bits(RB_EOPTIMESTAMP(device, rb));
+		cmds[index++] = rb->timestamp;
+	}
+
+	if (IS_WFI(flags))
+		cmds[index++] = cp_type7_packet(CP_WAIT_FOR_IDLE, 0);
+
+	if (IS_SECURE(flags)) {
+		cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+		cmds[index++] = CP_CONCURRENT_BIN_DISABLE;
+		cmds[index++] = cp_type7_packet(CP_SET_SECURE_MODE, 1);
+		cmds[index++] = 0;
+		cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+		cmds[index++] = CP_SYNC_THREADS;
+	}
+
+	cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+	cmds[index++] = CP_SET_THREAD_BOTH;
+
+	cmds[index++] = cp_type7_packet(CP_SET_MARKER, 1);
+	cmds[index++] = 0x100; /* IFPC enable */
+
+	cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+	cmds[index++] = CP_SET_THREAD_BR;
+
+	/* 10 dwords */
+	index += gen7_preemption_post_ibsubmit(adreno_dev, &cmds[index]);
+
+	/* Adjust the thing for the number of bytes we actually wrote */
+	rb->_wptr -= (size - index);
+
+	return gen7_ringbuffer_submit(rb, time);
+}
+
+static u32 gen7_get_alwayson_counter(u32 *cmds, u64 gpuaddr)
+{
+	cmds[0] = cp_type7_packet(CP_REG_TO_MEM, 3);
+	cmds[1] = GEN7_CP_ALWAYS_ON_COUNTER_LO | (1 << 30) | (2 << 18);
+	cmds[2] = lower_32_bits(gpuaddr);
+	cmds[3] = upper_32_bits(gpuaddr);
+
+	return 4;
+}
+
+static u32 gen7_get_alwayson_context(u32 *cmds, u64 gpuaddr)
+{
+	cmds[0] = cp_type7_packet(CP_REG_TO_MEM, 3);
+	cmds[1] = GEN7_CP_ALWAYS_ON_CONTEXT_LO | (1 << 30) | (2 << 18);
+	cmds[2] = lower_32_bits(gpuaddr);
+	cmds[3] = upper_32_bits(gpuaddr);
+
+	return 4;
+}
+
+#define PROFILE_IB_DWORDS 4
+#define PROFILE_IB_SLOTS (PAGE_SIZE / (PROFILE_IB_DWORDS << 2))
+
+static u64 gen7_get_user_profiling_ib(struct adreno_ringbuffer *rb,
+		struct kgsl_drawobj_cmd *cmdobj, u32 target_offset, u32 *cmds)
+{
+	u32 offset, *ib, dwords;
+
+	if (IS_ERR(rb->profile_desc))
+		return 0;
+
+	offset = rb->profile_index * (PROFILE_IB_DWORDS << 2);
+	ib = rb->profile_desc->hostptr + offset;
+	dwords = gen7_get_alwayson_counter(ib,
+		cmdobj->profiling_buffer_gpuaddr + target_offset);
+
+	cmds[0] = cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3);
+	cmds[1] = lower_32_bits(rb->profile_desc->gpuaddr + offset);
+	cmds[2] = upper_32_bits(rb->profile_desc->gpuaddr + offset);
+	cmds[3] = dwords;
+
+	rb->profile_index = (rb->profile_index + 1) % PROFILE_IB_SLOTS;
+
+	return 4;
+}
+
+static int gen7_drawctxt_switch(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb,
+		struct adreno_context *drawctxt)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	if (rb->drawctxt_active == drawctxt)
+		return 0;
+
+	if (kgsl_context_detached(&drawctxt->base))
+		return -ENOENT;
+
+	if (!_kgsl_context_get(&drawctxt->base))
+		return -ENOENT;
+
+	ret = gen7_rb_context_switch(adreno_dev, rb, drawctxt);
+	if (ret) {
+		kgsl_context_put(&drawctxt->base);
+		return ret;
+	}
+
+	trace_adreno_drawctxt_switch(rb, drawctxt);
+
+	/* Release the current drawctxt as soon as the new one is switched */
+	adreno_put_drawctxt_on_timestamp(device, rb->drawctxt_active,
+		rb, rb->timestamp);
+
+	rb->drawctxt_active = drawctxt;
+	return 0;
+}
+
+
+#define GEN7_USER_PROFILE_IB(rb, cmdobj, cmds, field) \
+	gen7_get_user_profiling_ib((rb), (cmdobj), \
+		offsetof(struct kgsl_drawobj_profiling_buffer, field), \
+		(cmds))
+
+#define GEN7_KERNEL_PROFILE(dev, cmdobj, cmds, field) \
+	gen7_get_alwayson_counter((cmds), \
+		(dev)->profile_buffer->gpuaddr + \
+			ADRENO_DRAWOBJ_PROFILE_OFFSET((cmdobj)->profile_index, \
+				field))
+
+#define GEN7_KERNEL_PROFILE_CONTEXT(dev, cmdobj, cmds, field) \
+	gen7_get_alwayson_context((cmds), \
+		(dev)->profile_buffer->gpuaddr + \
+			ADRENO_DRAWOBJ_PROFILE_OFFSET((cmdobj)->profile_index, \
+				field))
+
+#define GEN7_COMMAND_DWORDS 60
+
+int gen7_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
+		struct kgsl_drawobj_cmd *cmdobj, u32 flags,
+		struct adreno_submit_time *time)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context);
+	struct adreno_ringbuffer *rb = drawctxt->rb;
+	int ret = 0, numibs = 0, index = 0;
+	u32 *cmds;
+
+	/* Count the number of IBs (if we are not skipping) */
+	if (!IS_SKIP(flags)) {
+		struct list_head *tmp;
+
+		list_for_each(tmp, &cmdobj->cmdlist)
+			numibs++;
+	}
+
+	cmds = kvmalloc((GEN7_COMMAND_DWORDS + (numibs * 5)) << 2, GFP_KERNEL);
+	if (!cmds) {
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	cmds[index++] = cp_type7_packet(CP_NOP, 1);
+	cmds[index++] = START_IB_IDENTIFIER;
+
+	/* Kernel profiling: 8 dwords */
+	if (IS_KERNEL_PROFILE(flags)) {
+		index += GEN7_KERNEL_PROFILE(adreno_dev, cmdobj, &cmds[index],
+			started);
+		index += GEN7_KERNEL_PROFILE_CONTEXT(adreno_dev, cmdobj, &cmds[index],
+			ctx_start);
+	}
+
+	/* User profiling: 4 dwords */
+	if (IS_USER_PROFILE(flags))
+		index += GEN7_USER_PROFILE_IB(rb, cmdobj, &cmds[index],
+			gpu_ticks_submitted);
+
+	if (is_concurrent_binning(drawctxt)) {
+		cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+		cmds[index++] = CP_SET_THREAD_BOTH;
+	}
+	if (numibs) {
+		struct kgsl_memobj_node *ib;
+
+		cmds[index++] = cp_type7_packet(CP_SET_MARKER, 1);
+		cmds[index++] = 0x00d; /* IB1LIST start */
+
+		list_for_each_entry(ib, &cmdobj->cmdlist, node) {
+			if (ib->priv & MEMOBJ_SKIP ||
+				(ib->flags & KGSL_CMDLIST_CTXTSWITCH_PREAMBLE &&
+				 !IS_PREAMBLE(flags)))
+				cmds[index++] = cp_type7_packet(CP_NOP, 4);
+
+			cmds[index++] =
+				cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3);
+			cmds[index++] = lower_32_bits(ib->gpuaddr);
+			cmds[index++] = upper_32_bits(ib->gpuaddr);
+
+			/* Double check that IB_PRIV is never set */
+			cmds[index++] = (ib->size >> 2) & 0xfffff;
+		}
+
+		cmds[index++] = cp_type7_packet(CP_SET_MARKER, 1);
+		cmds[index++] = 0x00e; /* IB1LIST end */
+	}
+
+	if (is_concurrent_binning(drawctxt)) {
+		cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+		cmds[index++] = CP_SET_THREAD_BR;
+	}
+	/* CCU invalidate depth */
+	cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 1);
+	cmds[index++] = 24;
+
+	/* CCU invalidate color */
+	cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 1);
+	cmds[index++] = 25;
+
+	/* 8 dwords */
+	if (IS_KERNEL_PROFILE(flags)) {
+		index += GEN7_KERNEL_PROFILE(adreno_dev, cmdobj, &cmds[index],
+			retired);
+		index += GEN7_KERNEL_PROFILE_CONTEXT(adreno_dev, cmdobj, &cmds[index],
+			ctx_end);
+	}
+
+	/* 4 dwords */
+	if (IS_USER_PROFILE(flags))
+		index += GEN7_USER_PROFILE_IB(rb, cmdobj, &cmds[index],
+			gpu_ticks_retired);
+
+	cmds[index++] = cp_type7_packet(CP_NOP, 1);
+	cmds[index++] = END_IB_IDENTIFIER;
+
+	ret = gen7_drawctxt_switch(adreno_dev, rb, drawctxt);
+
+	/*
+	 * In the unlikely event of an error in the drawctxt switch,
+	 * treat it like a hang
+	 */
+	if (ret) {
+		/*
+		 * It is "normal" to get a -ENOSPC or a -ENOENT. Don't log it,
+		 * the upper layers know how to handle it
+		 */
+		if (ret != -ENOSPC && ret != -ENOENT)
+			dev_err(device->dev,
+				"Unable to switch draw context: %d\n", ret);
+		goto done;
+	}
+
+	adreno_drawobj_set_constraint(device, drawobj);
+
+	ret = gen7_ringbuffer_addcmds(adreno_dev, drawctxt->rb, drawctxt,
+		flags, cmds, index, drawobj->timestamp, time);
+
+done:
+	trace_kgsl_issueibcmds(device, drawctxt->base.id, numibs,
+		drawobj->timestamp, drawobj->flags, ret, drawctxt->type);
+
+	kvfree(cmds);
+	return ret;
+}
--- a/qcom/opensource/graphics-kernel/adreno_gen7_rpmh.c
+++ b/qcom/opensource/graphics-kernel/adreno_gen7_rpmh.c
@ -0,0 +1,519 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/types.h>
+#include <soc/qcom/cmd-db.h>
+#include <soc/qcom/tcs.h>
+
+#include "adreno.h"
+#include "adreno_gen7.h"
+#include "kgsl_bus.h"
+#include "kgsl_device.h"
+
+struct rpmh_arc_vals {
+	u32 num;
+	const u16 *val;
+};
+
+struct bcm {
+	const char *name;
+	u32 buswidth;
+	u32 channels;
+	u32 unit;
+	u16 width;
+	u8 vcd;
+	bool fixed;
+};
+
+struct bcm_data {
+	__le32 unit;
+	__le16 width;
+	u8 vcd;
+	u8 reserved;
+};
+
+struct rpmh_bw_votes {
+	u32 wait_bitmask;
+	u32 num_cmds;
+	u32 *addrs;
+	u32 num_levels;
+	u32 **cmds;
+};
+
+#define ARC_VOTE_SET(pri, sec, vlvl) \
+	(FIELD_PREP(GENMASK(31, 16), vlvl) | \
+	 FIELD_PREP(GENMASK(15, 8), sec) | \
+	 FIELD_PREP(GENMASK(7, 0), pri))
+
+static int rpmh_arc_cmds(struct rpmh_arc_vals *arc, const char *res_id)
+{
+	size_t len = 0;
+
+	arc->val = cmd_db_read_aux_data(res_id, &len);
+
+	/*
+	 * cmd_db_read_aux_data() gives us a zero-padded table of
+	 * size len that contains the arc values. To determine the
+	 * number of arc values, we loop through the table and count
+	 * them until we get to the end of the buffer or hit the
+	 * zero padding.
+	 */
+	for (arc->num = 1; arc->num < (len >> 1); arc->num++) {
+		if (arc->val[arc->num - 1] != 0 && arc->val[arc->num] == 0)
+			break;
+	}
+
+	return 0;
+}
+
+static int setup_volt_dependency_tbl(u32 *votes,
+		struct rpmh_arc_vals *pri_rail, struct rpmh_arc_vals *sec_rail,
+		u16 *vlvl, unsigned int num_entries)
+{
+	int i, j, k;
+	uint16_t cur_vlvl;
+	bool found_match;
+
+	/* i tracks current KGSL GPU frequency table entry
+	 * j tracks secondary rail voltage table entry
+	 * k tracks primary rail voltage table entry
+	 */
+	for (i = 0; i < num_entries; i++) {
+		found_match = false;
+
+		/* Look for a primary rail voltage that matches a VLVL level */
+		for (k = 0; k < pri_rail->num; k++) {
+			if (pri_rail->val[k] >= vlvl[i]) {
+				cur_vlvl = pri_rail->val[k];
+				found_match = true;
+				break;
+			}
+		}
+
+		/* If we did not find a matching VLVL level then abort */
+		if (!found_match)
+			return -EINVAL;
+
+		/*
+		 * Look for a secondary rail index whose VLVL value
+		 * is greater than or equal to the VLVL value of the
+		 * corresponding index of the primary rail
+		 */
+		for (j = 0; j < sec_rail->num; j++) {
+			if (sec_rail->val[j] >= cur_vlvl ||
+					j + 1 == sec_rail->num)
+				break;
+		}
+
+		if (j == sec_rail->num)
+			j = 0;
+
+		votes[i] = ARC_VOTE_SET(k, j, cur_vlvl);
+	}
+
+	return 0;
+}
+
+/* Generate a set of bandwidth votes for the list of BCMs */
+static void tcs_cmd_data(struct bcm *bcms, int count,
+		u32 ab, u32 ib, u32 *data, u32 perfmode_vote, bool set_perfmode)
+{
+	int i;
+
+	for (i = 0; i < count; i++) {
+		bool valid = true;
+		bool commit = false;
+		u64 avg, peak, x, y;
+
+		if (i == count - 1 || bcms[i].vcd != bcms[i + 1].vcd)
+			commit = true;
+
+		if (bcms[i].fixed) {
+			if (!ab && !ib)
+				data[i] = BCM_TCS_CMD(commit, false, 0x0, 0x0);
+			else
+				data[i] = BCM_TCS_CMD(commit, true, 0x0,
+							set_perfmode ? perfmode_vote : 0x0);
+			continue;
+		}
+
+		/* Multiple the bandwidth by the width of the connection */
+		avg = ((u64) ab) * bcms[i].width;
+
+		/* And then divide by the total width */
+		do_div(avg, bcms[i].buswidth);
+
+		peak = ((u64) ib) * bcms[i].width;
+		do_div(peak, bcms[i].buswidth);
+
+		/* Input bandwidth value is in KBps */
+		x = avg * 1000ULL;
+		do_div(x, bcms[i].unit);
+
+		/* Input bandwidth value is in KBps */
+		y = peak * 1000ULL;
+		do_div(y, bcms[i].unit);
+
+		/*
+		 * If a bandwidth value was specified but the calculation ends
+		 * rounding down to zero, set a minimum level
+		 */
+		if (ab && x == 0)
+			x = 1;
+
+		if (ib && y == 0)
+			y = 1;
+
+		x = min_t(u64, x, BCM_TCS_CMD_VOTE_MASK);
+		y = min_t(u64, y, BCM_TCS_CMD_VOTE_MASK);
+
+		if (!x && !y)
+			valid = false;
+
+		data[i] = BCM_TCS_CMD(commit, valid, x, y);
+	}
+}
+
+static void free_rpmh_bw_votes(struct rpmh_bw_votes *votes)
+{
+	int i;
+
+	if (!votes)
+		return;
+
+	for (i = 0; votes->cmds && i < votes->num_levels; i++)
+		kfree(votes->cmds[i]);
+
+	kfree(votes->cmds);
+	kfree(votes->addrs);
+	kfree(votes);
+}
+
+/* Build the votes table from the specified bandwidth levels */
+static struct rpmh_bw_votes *build_rpmh_bw_votes(struct bcm *bcms,
+		int bcm_count, u32 *levels, int levels_count, u32 perfmode_vote, u32 perfmode_lvl)
+{
+	struct rpmh_bw_votes *votes;
+	bool set_perfmode;
+	int i;
+
+	votes = kzalloc(sizeof(*votes), GFP_KERNEL);
+	if (!votes)
+		return ERR_PTR(-ENOMEM);
+
+	votes->addrs = kcalloc(bcm_count, sizeof(*votes->cmds), GFP_KERNEL);
+	if (!votes->addrs) {
+		free_rpmh_bw_votes(votes);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	votes->cmds = kcalloc(levels_count, sizeof(*votes->cmds), GFP_KERNEL);
+	if (!votes->cmds) {
+		free_rpmh_bw_votes(votes);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	votes->num_cmds = bcm_count;
+	votes->num_levels = levels_count;
+
+	/* Get the cmd-db information for each BCM */
+	for (i = 0; i < bcm_count; i++) {
+		size_t l;
+		const struct bcm_data *data;
+
+		data = cmd_db_read_aux_data(bcms[i].name, &l);
+
+		votes->addrs[i] = cmd_db_read_addr(bcms[i].name);
+
+		bcms[i].unit = le32_to_cpu(data->unit);
+		bcms[i].width = le16_to_cpu(data->width);
+		bcms[i].vcd = data->vcd;
+	}
+
+	for (i = 0; i < bcm_count; i++) {
+		if (i == (bcm_count - 1) || bcms[i].vcd != bcms[i + 1].vcd)
+			votes->wait_bitmask |= (1 << i);
+	}
+
+	for (i = 0; i < levels_count; i++) {
+		votes->cmds[i] = kcalloc(bcm_count, sizeof(u32), GFP_KERNEL);
+		if (!votes->cmds[i]) {
+			free_rpmh_bw_votes(votes);
+			return ERR_PTR(-ENOMEM);
+		}
+
+		set_perfmode = (i >= perfmode_lvl) ? true : false;
+		tcs_cmd_data(bcms, bcm_count, levels[i], levels[i], votes->cmds[i],
+								perfmode_vote, set_perfmode);
+	}
+
+	return votes;
+}
+
+/*
+ * setup_gmu_arc_votes - Build the gmu voting table
+ * @gmu: Pointer to gmu device
+ * @pri_rail: Pointer to primary power rail vlvl table
+ * @sec_rail: Pointer to second/dependent power rail vlvl table
+ *
+ * This function initializes the cx votes for all gmu frequencies
+ * for gmu dcvs
+ */
+static int setup_cx_arc_votes(struct gen7_gmu_device *gmu,
+	struct rpmh_arc_vals *pri_rail, struct rpmh_arc_vals *sec_rail)
+{
+	/* Hardcoded values of GMU CX voltage levels */
+	u16 gmu_cx_vlvl[MAX_CX_LEVELS];
+	u32 cx_votes[MAX_CX_LEVELS];
+	struct gen7_dcvs_table *table = &gmu->dcvs_table;
+	u32 *freqs = gmu->freqs;
+	u32 *vlvls = gmu->vlvls;
+	int ret, i;
+
+	gmu_cx_vlvl[0] = 0;
+	gmu_cx_vlvl[1] = vlvls[0];
+	gmu_cx_vlvl[2] = vlvls[1];
+
+	table->gmu_level_num = 3;
+
+	table->cx_votes[0].freq = 0;
+	table->cx_votes[1].freq = freqs[0] / 1000;
+	table->cx_votes[2].freq = freqs[1] / 1000;
+
+	ret = setup_volt_dependency_tbl(cx_votes, pri_rail,
+			sec_rail, gmu_cx_vlvl, table->gmu_level_num);
+	if (!ret) {
+		for (i = 0; i < table->gmu_level_num; i++)
+			table->cx_votes[i].vote = cx_votes[i];
+	}
+
+	return ret;
+}
+
+static int to_cx_hlvl(struct rpmh_arc_vals *cx_rail, u32 vlvl, u32 *hlvl)
+{
+	u32 i;
+
+	/*
+	 * This means that the Gx level doesn't have a dependency on Cx level.
+	 * Return the same value to disable cx voting at GMU.
+	 */
+	if (vlvl == 0xffffffff) {
+		*hlvl = vlvl;
+		return 0;
+	}
+
+	for (i = 0; i < cx_rail->num; i++) {
+		if (cx_rail->val[i] >= vlvl) {
+			*hlvl = i;
+			return 0;
+		}
+	}
+
+	return -EINVAL;
+}
+
+/*
+ * setup_gx_arc_votes - Build the gpu dcvs voting table
+ * @hfi: Pointer to hfi device
+ * @pri_rail: Pointer to primary power rail vlvl table
+ * @sec_rail: Pointer to second/dependent power rail vlvl table
+ *
+ * This function initializes the gx votes for all gpu frequencies
+ * for gpu dcvs
+ */
+static int setup_gx_arc_votes(struct adreno_device *adreno_dev,
+	struct rpmh_arc_vals *pri_rail, struct rpmh_arc_vals *sec_rail,
+	struct rpmh_arc_vals *cx_rail)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct gen7_dcvs_table *table = &gmu->dcvs_table;
+	u32 index;
+	u16 vlvl_tbl[MAX_GX_LEVELS];
+	u32 gx_votes[MAX_GX_LEVELS];
+	int ret, i;
+
+	if (pwr->num_pwrlevels + 1 > ARRAY_SIZE(vlvl_tbl)) {
+		dev_err(device->dev,
+			"Defined more GPU DCVS levels than RPMh can support\n");
+		return -ERANGE;
+	}
+
+	/* Add the zero powerlevel for the perf table */
+	table->gpu_level_num = pwr->num_pwrlevels + 1;
+
+	memset(vlvl_tbl, 0, sizeof(vlvl_tbl));
+
+	table->gx_votes[0].freq = 0;
+	table->gx_votes[0].cx_vote = 0;
+	/* Disable cx vote in gmu dcvs table if it is not supported in DT */
+	if (pwr->pwrlevels[0].cx_level == 0xffffffff)
+		table->gx_votes[0].cx_vote = 0xffffffff;
+
+	/* GMU power levels are in ascending order */
+	for (index = 1, i = pwr->num_pwrlevels - 1; i >= 0; i--, index++) {
+		u32 cx_vlvl = pwr->pwrlevels[i].cx_level;
+
+		vlvl_tbl[index] = pwr->pwrlevels[i].voltage_level;
+		table->gx_votes[index].freq = pwr->pwrlevels[i].gpu_freq / 1000;
+
+		ret = to_cx_hlvl(cx_rail, cx_vlvl,
+				&table->gx_votes[index].cx_vote);
+		if (ret) {
+			dev_err(device->dev, "Unsupported cx corner: %u\n",
+					cx_vlvl);
+			return ret;
+		}
+	}
+
+	ret = setup_volt_dependency_tbl(gx_votes, pri_rail,
+			sec_rail, vlvl_tbl, table->gpu_level_num);
+	if (!ret) {
+		for (i = 0; i < table->gpu_level_num; i++)
+			table->gx_votes[i].vote = gx_votes[i];
+	}
+
+	return ret;
+
+}
+
+static int build_dcvs_table(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	struct rpmh_arc_vals gx_arc, cx_arc, mx_arc;
+	int ret;
+
+	ret = rpmh_arc_cmds(&gx_arc, "gfx.lvl");
+	if (ret)
+		return ret;
+
+	ret = rpmh_arc_cmds(&cx_arc, "cx.lvl");
+	if (ret)
+		return ret;
+
+	ret = rpmh_arc_cmds(&mx_arc, "mx.lvl");
+	if (ret)
+		return ret;
+
+	ret = setup_cx_arc_votes(gmu, &cx_arc, &mx_arc);
+	if (ret)
+		return ret;
+
+	return setup_gx_arc_votes(adreno_dev, &gx_arc, &mx_arc, &cx_arc);
+}
+
+/*
+ * List of Bus Control Modules (BCMs) that need to be configured for the GPU
+ * to access DDR. For each bus level we will generate a vote each BC
+ */
+static struct bcm gen7_ddr_bcms[] = {
+	{ .name = "SH0", .buswidth = 16 },
+	{ .name = "MC0", .buswidth = 4 },
+	{ .name = "ACV", .fixed = true },
+};
+
+/* Same as above, but for the CNOC BCMs */
+static struct bcm gen7_cnoc_bcms[] = {
+	{ .name = "CN0", .buswidth = 4 },
+};
+
+static void build_bw_table_cmd(struct hfi_bwtable_cmd *cmd,
+		struct rpmh_bw_votes *ddr, struct rpmh_bw_votes *cnoc)
+{
+	u32 i, j;
+
+	cmd->bw_level_num = ddr->num_levels;
+	cmd->ddr_cmds_num = ddr->num_cmds;
+	cmd->ddr_wait_bitmask = ddr->wait_bitmask;
+
+	for (i = 0; i < ddr->num_cmds; i++)
+		cmd->ddr_cmd_addrs[i] = ddr->addrs[i];
+
+	for (i = 0; i < ddr->num_levels; i++)
+		for (j = 0; j < ddr->num_cmds; j++)
+			cmd->ddr_cmd_data[i][j] = (u32) ddr->cmds[i][j];
+
+	if (!cnoc)
+		return;
+
+	cmd->cnoc_cmds_num = cnoc->num_cmds;
+		cmd->cnoc_wait_bitmask = cnoc->wait_bitmask;
+
+	for (i = 0; i < cnoc->num_cmds; i++)
+		cmd->cnoc_cmd_addrs[i] = cnoc->addrs[i];
+
+	for (i = 0; i < cnoc->num_levels; i++)
+		for (j = 0; j < cnoc->num_cmds; j++)
+			cmd->cnoc_cmd_data[i][j] = (u32) cnoc->cmds[i][j];
+}
+
+static int build_bw_table(struct adreno_device *adreno_dev)
+{
+	struct gen7_gmu_device *gmu = to_gen7_gmu(adreno_dev);
+	const struct adreno_gen7_core *gen7_core = to_gen7_core(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct rpmh_bw_votes *ddr, *cnoc = NULL;
+	u32 perfmode_vote = gen7_core->acv_perfmode_vote;
+	u32 perfmode_lvl = perfmode_vote ? kgsl_pwrctrl_get_acv_perfmode_lvl(device,
+					gen7_core->acv_perfmode_ddr_freq) : 1;
+	u32 *cnoc_table;
+	u32 count;
+	int ret;
+
+	/* If perfmode vote is not defined, use default value as 0x8 */
+	if (!perfmode_vote)
+		perfmode_vote = BIT(3);
+
+	ddr = build_rpmh_bw_votes(gen7_ddr_bcms, ARRAY_SIZE(gen7_ddr_bcms),
+		pwr->ddr_table, pwr->ddr_table_count, perfmode_vote, perfmode_lvl);
+	if (IS_ERR(ddr))
+		return PTR_ERR(ddr);
+
+	cnoc_table = kgsl_bus_get_table(device->pdev, "qcom,bus-table-cnoc",
+		&count);
+
+	if (count > 0)
+		cnoc = build_rpmh_bw_votes(gen7_cnoc_bcms,
+			ARRAY_SIZE(gen7_cnoc_bcms), cnoc_table, count, 0, 0);
+
+	kfree(cnoc_table);
+
+	if (IS_ERR(cnoc)) {
+		free_rpmh_bw_votes(ddr);
+		return PTR_ERR(cnoc);
+	}
+
+	ret = CMD_MSG_HDR(gmu->hfi.bw_table, H2F_MSG_BW_VOTE_TBL);
+	if (ret)
+		return ret;
+
+	build_bw_table_cmd(&gmu->hfi.bw_table, ddr, cnoc);
+
+	free_rpmh_bw_votes(ddr);
+	free_rpmh_bw_votes(cnoc);
+
+	return 0;
+}
+
+int gen7_build_rpmh_tables(struct adreno_device *adreno_dev)
+{
+	int ret;
+
+	ret = build_dcvs_table(adreno_dev);
+	if (ret) {
+		dev_err(adreno_dev->dev.dev, "Failed to build dcvs table\n");
+		return ret;
+	}
+
+	ret = build_bw_table(adreno_dev);
+	if (ret)
+		dev_err(adreno_dev->dev.dev, "Failed to build bw table\n");
+
+	return ret;
+}
--- a/qcom/opensource/graphics-kernel/adreno_gen7_snapshot.c
+++ b/qcom/opensource/graphics-kernel/adreno_gen7_snapshot.c
--- a/qcom/opensource/graphics-kernel/adreno_gen7_snapshot.h
+++ b/qcom/opensource/graphics-kernel/adreno_gen7_snapshot.h
@ -0,0 +1,383 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+#ifndef __ADRENO_GEN7_SNAPSHOT_H
+#define __ADRENO_GEN7_SNAPSHOT_H
+
+#include "adreno.h"
+#include "adreno_gen7.h"
+#include "kgsl_regmap.h"
+
+#define CLUSTER_NONE 0
+#define CLUSTER_FE 1
+#define CLUSTER_SP_VS 2
+#define CLUSTER_PC_VS 3
+#define CLUSTER_GRAS 4
+#define CLUSTER_SP_PS 5
+#define CLUSTER_VPC_PS 6
+#define CLUSTER_PS 7
+
+#define HLSQ_STATE 0
+#define HLSQ_DP 1
+#define SP_TOP 2
+#define USPTP 3
+#define HLSQ_DP_STR 4
+
+#define STATE_NON_CONTEXT 0
+#define STATE_TOGGLE_CTXT 1
+#define STATE_FORCE_CTXT_0 2
+#define STATE_FORCE_CTXT_1 3
+
+#define GEN7_DEBUGBUS_BLOCK_SIZE 0x100
+
+/* Number of dword to dump in snapshot for CP SQE */
+#define GEN7_SQE_FW_SNAPSHOT_DWORDS 5
+
+struct gen7_sel_reg {
+	unsigned int host_reg;
+	unsigned int cd_reg;
+	unsigned int val;
+};
+
+struct gen7_sptp_cluster_registers {
+	/* cluster_id: Cluster identifier */
+	int cluster_id;
+	/* statetype: SP block state type for the cluster */
+	int statetype;
+	/* pipe_id: Pipe identifier */
+	int pipe_id;
+	/* context_id: Context identifier */
+	int context_id;
+	/* location_id: Location identifier */
+	int location_id;
+	/* regs: Pointer to the list of register pairs to read */
+	const u32 *regs;
+	/* regbase: Dword offset of the register block in the GPu register space */
+	unsigned int regbase;
+	/* offset: Internal variable used to track the crashdump state */
+	unsigned int offset;
+};
+
+struct gen7_shader_block {
+	/* statetype: Type identifer for the block */
+	u32 statetype;
+	/* size: Size of the block (in dwords) */
+	u32 size;
+	/* num_sps: The SP id to dump */
+	u32 num_sps;
+	/* num_usptps: The number of USPTPs to dump */;
+	u32 num_usptps;
+	/* pipe_id: Pipe identifier for the block data  */
+	u32 pipeid;
+	/* location: Location identifer for the block data */
+	u32 location;
+	/* offset: The offset in the snasphot dump */
+	u64 offset;
+};
+
+struct gen7_shader_block_info {
+	struct gen7_shader_block *block;
+	unsigned int sp_id;
+	unsigned int usptp;
+	u32 bank;
+	u64 offset;
+};
+
+struct gen7_reg_list {
+	const u32 *regs;
+	const struct gen7_sel_reg *sel;
+	u64 offset;
+};
+
+struct gen7_cp_indexed_reg {
+	u32 addr;
+	u32 data;
+	u32 size;
+};
+
+struct gen7_cluster_registers {
+	/* cluster_id: Cluster identifier */
+	int cluster_id;
+	/* pipe_id: Pipe Identifier */
+	int pipe_id;
+	/* context_id: one of STATE_ that identifies the context to dump */
+	int context_id;
+	/* regs: Pointer to an array of register pairs */
+	const u32 *regs;
+	/* sel: Pointer to a selector register to write before reading */
+	const struct gen7_sel_reg *sel;
+	/* offset: Internal variable to track the state of the crashdump */
+	unsigned int offset;
+};
+
+struct gen7_snapshot_block_list {
+	/* pre_crashdumper_regs : Registers which need to be dumped before CD runs */
+	const u32 *pre_crashdumper_regs;
+	/* debugbus_blocks : List of debugbus blocks */
+	const u32 *debugbus_blocks;
+	/* debugbus_blocks_len : Length of the debugbus list */
+	size_t debugbus_blocks_len;
+	/* gbif_debugbus_blocks : List of GBIF debugbus blocks */
+	const u32 *gbif_debugbus_blocks;
+	/* gbif_debugbus_blocks_len : Length of GBIF debugbus list */
+	size_t gbif_debugbus_blocks_len;
+	/* cx_debugbus_blocks : List of CX debugbus blocks */
+	const u32 *cx_debugbus_blocks;
+	/* cx_debugbus_blocks_len : Length of the CX debugbus list */
+	size_t cx_debugbus_blocks_len;
+	/* external_core_regs : List of external core registers */
+	const u32 **external_core_regs;
+	/* num_external_core_regs : length of external core registers list */
+	size_t num_external_core_regs;
+	/* gmu_regs : List of GMU registers */
+	const u32 *gmu_regs;
+	/* gmu_gx_regs : List of GMU GX registers */
+	const u32 *gmu_gx_regs;
+	/* rscc_regs : List of RSCC registers */
+	const u32 *rscc_regs;
+	/* reg_list : List of GPU internal registers */
+	struct gen7_reg_list *reg_list;
+	/* reg_list : List of cx_misc registers */
+	const u32 *cx_misc_regs;
+	/* shader_blocks : List of GPU shader memory */
+	struct gen7_shader_block *shader_blocks;
+	/* num_shader_blocks : Length of the shader memory list */
+	size_t num_shader_blocks;
+	/* cluster_registers : List of GPU cluster registers */
+	struct gen7_cluster_registers *clusters;
+	/* num_clusters : Length of GPU cluster registers list */
+	size_t num_clusters;
+	/* spstp_cluster_registers : List of GPU SPTP cluster registers */
+	struct gen7_sptp_cluster_registers *sptp_clusters;
+	/* num_sptp_clusters : Length of GPU SPTP cluster registers list */
+	size_t num_sptp_clusters;
+	/* post_crashdumper_regs : Registers which need to be dumped after CD runs */
+	const u32 *post_crashdumper_regs;
+	/* index_registers : List of index_registers */
+	struct gen7_cp_indexed_reg *index_registers;
+	/* index_registers_len : Length of the index registers */
+	size_t index_registers_len;
+};
+
+struct gen7_trace_buffer_info {
+	u16 dbgc_ctrl;
+	u16 segment;
+	u16 granularity;
+	u16 ping_blk[TRACE_BUF_NUM_SIG];
+	u16 ping_idx[TRACE_BUF_NUM_SIG];
+};
+
+enum gen7_debugbus_ids {
+	DEBUGBUS_CP_0_0           = 1,
+	DEBUGBUS_CP_0_1           = 2,
+	DEBUGBUS_RBBM             = 3,
+	DEBUGBUS_GBIF_GX          = 5,
+	DEBUGBUS_GBIF_CX          = 6,
+	DEBUGBUS_HLSQ             = 7,
+	DEBUGBUS_UCHE_0           = 9,
+	DEBUGBUS_UCHE_1           = 10,
+	DEBUGBUS_TESS_BR          = 13,
+	DEBUGBUS_TESS_BV          = 14,
+	DEBUGBUS_PC_BR            = 17,
+	DEBUGBUS_PC_BV            = 18,
+	DEBUGBUS_VFDP_BR          = 21,
+	DEBUGBUS_VFDP_BV          = 22,
+	DEBUGBUS_VPC_BR           = 25,
+	DEBUGBUS_VPC_BV           = 26,
+	DEBUGBUS_TSE_BR           = 29,
+	DEBUGBUS_TSE_BV           = 30,
+	DEBUGBUS_RAS_BR           = 33,
+	DEBUGBUS_RAS_BV           = 34,
+	DEBUGBUS_VSC              = 37,
+	DEBUGBUS_COM_0            = 39,
+	DEBUGBUS_LRZ_BR           = 43,
+	DEBUGBUS_LRZ_BV           = 44,
+	DEBUGBUS_UFC_0            = 47,
+	DEBUGBUS_UFC_1            = 48,
+	DEBUGBUS_GMU_GX           = 55,
+	DEBUGBUS_DBGC             = 59,
+	DEBUGBUS_CX               = 60,
+	DEBUGBUS_GMU_CX           = 61,
+	DEBUGBUS_GPC_BR           = 62,
+	DEBUGBUS_GPC_BV           = 63,
+	DEBUGBUS_LARC             = 66,
+	DEBUGBUS_HLSQ_SPTP        = 68,
+	DEBUGBUS_RB_0             = 70,
+	DEBUGBUS_RB_1             = 71,
+	DEBUGBUS_RB_2             = 72,
+	DEBUGBUS_RB_3             = 73,
+	DEBUGBUS_RB_4             = 74,
+	DEBUGBUS_RB_5             = 75,
+	DEBUGBUS_UCHE_WRAPPER     = 102,
+	DEBUGBUS_CCU_0            = 106,
+	DEBUGBUS_CCU_1            = 107,
+	DEBUGBUS_CCU_2            = 108,
+	DEBUGBUS_CCU_3            = 109,
+	DEBUGBUS_CCU_4            = 110,
+	DEBUGBUS_CCU_5            = 111,
+	DEBUGBUS_VFD_BR_0         = 138,
+	DEBUGBUS_VFD_BR_1         = 139,
+	DEBUGBUS_VFD_BR_2         = 140,
+	DEBUGBUS_VFD_BR_3         = 141,
+	DEBUGBUS_VFD_BR_4         = 142,
+	DEBUGBUS_VFD_BR_5         = 143,
+	DEBUGBUS_VFD_BR_6         = 144,
+	DEBUGBUS_VFD_BR_7         = 145,
+	DEBUGBUS_VFD_BV_0         = 202,
+	DEBUGBUS_VFD_BV_1         = 203,
+	DEBUGBUS_VFD_BV_2         = 204,
+	DEBUGBUS_VFD_BV_3         = 205,
+	DEBUGBUS_USP_0            = 234,
+	DEBUGBUS_USP_1            = 235,
+	DEBUGBUS_USP_2            = 236,
+	DEBUGBUS_USP_3            = 237,
+	DEBUGBUS_USP_4            = 238,
+	DEBUGBUS_USP_5            = 239,
+	DEBUGBUS_TP_0             = 266,
+	DEBUGBUS_TP_1             = 267,
+	DEBUGBUS_TP_2             = 268,
+	DEBUGBUS_TP_3             = 269,
+	DEBUGBUS_TP_4             = 270,
+	DEBUGBUS_TP_5             = 271,
+	DEBUGBUS_TP_6             = 272,
+	DEBUGBUS_TP_7             = 273,
+	DEBUGBUS_TP_8             = 274,
+	DEBUGBUS_TP_9             = 275,
+	DEBUGBUS_TP_10            = 276,
+	DEBUGBUS_TP_11            = 277,
+	DEBUGBUS_USPTP_0          = 330,
+	DEBUGBUS_USPTP_1          = 331,
+	DEBUGBUS_USPTP_2          = 332,
+	DEBUGBUS_USPTP_3          = 333,
+	DEBUGBUS_USPTP_4          = 334,
+	DEBUGBUS_USPTP_5          = 335,
+	DEBUGBUS_USPTP_6          = 336,
+	DEBUGBUS_USPTP_7          = 337,
+	DEBUGBUS_USPTP_8          = 338,
+	DEBUGBUS_USPTP_9          = 339,
+	DEBUGBUS_USPTP_10         = 340,
+	DEBUGBUS_USPTP_11         = 341,
+	DEBUGBUS_CCHE_0           = 396,
+	DEBUGBUS_CCHE_1           = 397,
+	DEBUGBUS_CCHE_2           = 398,
+	DEBUGBUS_VPC_DSTR_0       = 408,
+	DEBUGBUS_VPC_DSTR_1       = 409,
+	DEBUGBUS_VPC_DSTR_2       = 410,
+	DEBUGBUS_HLSQ_DP_STR_0    = 411,
+	DEBUGBUS_HLSQ_DP_STR_1    = 412,
+	DEBUGBUS_HLSQ_DP_STR_2    = 413,
+	DEBUGBUS_HLSQ_DP_STR_3    = 414,
+	DEBUGBUS_HLSQ_DP_STR_4    = 415,
+	DEBUGBUS_HLSQ_DP_STR_5    = 416,
+	DEBUGBUS_UFC_DSTR_0       = 443,
+	DEBUGBUS_UFC_DSTR_1       = 444,
+	DEBUGBUS_UFC_DSTR_2       = 445,
+	DEBUGBUS_CGC_SUBCORE      = 446,
+	DEBUGBUS_CGC_CORE         = 447,
+};
+
+static const u32 gen7_gbif_debugbus_blocks[] = {
+	DEBUGBUS_GBIF_CX,
+	DEBUGBUS_GBIF_GX,
+};
+
+static const u32 gen7_cx_dbgc_debugbus_blocks[] = {
+	DEBUGBUS_GMU_CX,
+	DEBUGBUS_CX,
+	DEBUGBUS_GBIF_CX,
+};
+
+enum gen7_statetype_ids {
+	TP0_NCTX_REG                   = 0,
+	TP0_CTX0_3D_CVS_REG            = 1,
+	TP0_CTX0_3D_CPS_REG            = 2,
+	TP0_CTX1_3D_CVS_REG            = 3,
+	TP0_CTX1_3D_CPS_REG            = 4,
+	TP0_CTX2_3D_CPS_REG            = 5,
+	TP0_CTX3_3D_CPS_REG            = 6,
+	TP0_TMO_DATA                   = 9,
+	TP0_SMO_DATA                   = 10,
+	TP0_MIPMAP_BASE_DATA           = 11,
+	SP_NCTX_REG                    = 32,
+	SP_CTX0_3D_CVS_REG             = 33,
+	SP_CTX0_3D_CPS_REG             = 34,
+	SP_CTX1_3D_CVS_REG             = 35,
+	SP_CTX1_3D_CPS_REG             = 36,
+	SP_CTX2_3D_CPS_REG             = 37,
+	SP_CTX3_3D_CPS_REG             = 38,
+	SP_INST_DATA                   = 39,
+	SP_INST_DATA_1                 = 40,
+	SP_LB_0_DATA                   = 41,
+	SP_LB_1_DATA                   = 42,
+	SP_LB_2_DATA                   = 43,
+	SP_LB_3_DATA                   = 44,
+	SP_LB_4_DATA                   = 45,
+	SP_LB_5_DATA                   = 46,
+	SP_LB_6_DATA                   = 47,
+	SP_LB_7_DATA                   = 48,
+	SP_CB_RAM                      = 49,
+	SP_LB_13_DATA                  = 50,
+	SP_LB_14_DATA                  = 51,
+	SP_INST_TAG                    = 52,
+	SP_INST_DATA_2                 = 53,
+	SP_TMO_TAG                     = 54,
+	SP_SMO_TAG                     = 55,
+	SP_STATE_DATA                  = 56,
+	SP_HWAVE_RAM                   = 57,
+	SP_L0_INST_BUF                 = 58,
+	SP_LB_8_DATA                   = 59,
+	SP_LB_9_DATA                   = 60,
+	SP_LB_10_DATA                  = 61,
+	SP_LB_11_DATA                  = 62,
+	SP_LB_12_DATA                  = 63,
+	HLSQ_DATAPATH_DSTR_META        = 64,
+	HLSQ_L2STC_TAG_RAM             = 67,
+	HLSQ_L2STC_INFO_CMD            = 68,
+	HLSQ_CVS_BE_CTXT_BUF_RAM_TAG   = 69,
+	HLSQ_CPS_BE_CTXT_BUF_RAM_TAG   = 70,
+	HLSQ_GFX_CVS_BE_CTXT_BUF_RAM   = 71,
+	HLSQ_GFX_CPS_BE_CTXT_BUF_RAM   = 72,
+	HLSQ_CHUNK_CVS_RAM             = 73,
+	HLSQ_CHUNK_CPS_RAM             = 74,
+	HLSQ_CHUNK_CVS_RAM_TAG         = 75,
+	HLSQ_CHUNK_CPS_RAM_TAG         = 76,
+	HLSQ_ICB_CVS_CB_BASE_TAG       = 77,
+	HLSQ_ICB_CPS_CB_BASE_TAG       = 78,
+	HLSQ_CVS_MISC_RAM              = 79,
+	HLSQ_CPS_MISC_RAM              = 80,
+	HLSQ_CPS_MISC_RAM_1            = 81,
+	HLSQ_INST_RAM                  = 82,
+	HLSQ_GFX_CVS_CONST_RAM         = 83,
+	HLSQ_GFX_CPS_CONST_RAM         = 84,
+	HLSQ_CVS_MISC_RAM_TAG          = 85,
+	HLSQ_CPS_MISC_RAM_TAG          = 86,
+	HLSQ_INST_RAM_TAG              = 87,
+	HLSQ_GFX_CVS_CONST_RAM_TAG     = 88,
+	HLSQ_GFX_CPS_CONST_RAM_TAG     = 89,
+	HLSQ_GFX_LOCAL_MISC_RAM        = 90,
+	HLSQ_GFX_LOCAL_MISC_RAM_TAG    = 91,
+	HLSQ_INST_RAM_1                = 92,
+	HLSQ_STPROC_META               = 93,
+	HLSQ_BV_BE_META                = 94,
+	HLSQ_INST_RAM_2                = 95,
+	HLSQ_DATAPATH_META             = 96,
+	HLSQ_FRONTEND_META             = 97,
+	HLSQ_INDIRECT_META             = 98,
+	HLSQ_BACKEND_META              = 99,
+};
+
+static struct gen7_cp_indexed_reg gen7_cp_indexed_reg_list[] = {
+	{ GEN7_CP_SQE_STAT_ADDR, GEN7_CP_SQE_STAT_DATA, 0x40},
+	{ GEN7_CP_DRAW_STATE_ADDR, GEN7_CP_DRAW_STATE_DATA, 0x100},
+	{ GEN7_CP_SQE_UCODE_DBG_ADDR, GEN7_CP_SQE_UCODE_DBG_DATA, 0x8000},
+	{ GEN7_CP_BV_SQE_STAT_ADDR, GEN7_CP_BV_SQE_STAT_DATA, 0x40},
+	{ GEN7_CP_BV_DRAW_STATE_ADDR, GEN7_CP_BV_DRAW_STATE_DATA, 0x100},
+	{ GEN7_CP_BV_SQE_UCODE_DBG_ADDR, GEN7_CP_BV_SQE_UCODE_DBG_DATA, 0x8000},
+	{ GEN7_CP_SQE_AC_STAT_ADDR, GEN7_CP_SQE_AC_STAT_DATA, 0x40},
+	{ GEN7_CP_LPAC_DRAW_STATE_ADDR, GEN7_CP_LPAC_DRAW_STATE_DATA, 0x100},
+	{ GEN7_CP_SQE_AC_UCODE_DBG_ADDR, GEN7_CP_SQE_AC_UCODE_DBG_DATA, 0x8000},
+	{ GEN7_CP_LPAC_FIFO_DBG_ADDR, GEN7_CP_LPAC_FIFO_DBG_DATA, 0x40},
+};
+#endif /*_ADRENO_GEN7_SNAPSHOT_H */
--- a/qcom/opensource/graphics-kernel/adreno_gen8.c
+++ b/qcom/opensource/graphics-kernel/adreno_gen8.c
--- a/qcom/opensource/graphics-kernel/adreno_gen8.h
+++ b/qcom/opensource/graphics-kernel/adreno_gen8.h
@ -0,0 +1,615 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _ADRENO_GEN8_H_
+#define _ADRENO_GEN8_H_
+
+#include <linux/delay.h>
+
+#include "adreno_gen8_gmu.h"
+#include "gen8_reg.h"
+
+/* Forward struct declaration */
+struct gen8_snapshot_block_list;
+
+extern const struct adreno_power_ops gen8_gmu_power_ops;
+extern const struct adreno_power_ops gen8_hwsched_power_ops;
+extern const struct adreno_perfcounters adreno_gen8_perfcounters;
+
+struct gen8_gpudev {
+	struct adreno_gpudev base;
+	int (*hfi_probe)(struct adreno_device *adreno_dev);
+	void (*hfi_remove)(struct adreno_device *adreno_dev);
+	void (*handle_watchdog)(struct adreno_device *adreno_dev);
+};
+
+extern const struct gen8_gpudev adreno_gen8_gmu_gpudev;
+extern const struct gen8_gpudev adreno_gen8_hwsched_gpudev;
+
+struct gen8_nonctxt_overrides {
+	/** offset: Dword offset of the register to write */
+	u32 offset;
+	/** pipelines: Pipelines to write */
+	u32 pipelines;
+	/** val: Value to be written to the register */
+	u32 val;
+	/** set: True for user override request */
+	bool set;
+	/**
+	 * list_type: 0 If the register already present in any of exisiting static pwrup list
+			1 if the register fits into IFPC static pwrup only list
+			2 if the register fits into IFPC + preemption static list
+			3 if the register fits into external powerup list
+	 */
+	u32 list_type;
+};
+
+/**
+ * struct gen8_device - Container for the gen8_device
+ */
+struct gen8_device {
+	/** @gmu: Container for the gen8 GMU device */
+	struct gen8_gmu_device gmu;
+	/** @adreno_dev: Container for the generic adreno device */
+	struct adreno_device adreno_dev;
+	/** @aperture: The last value that the host aperture register was programmed to */
+	u32 aperture;
+	/** @ext_pwrup_list_len: External pwrup reglist length */
+	u16 ext_pwrup_list_len;
+	/**
+	 * @nc_overrides: Noncontext registers overrides whitelist if defined,
+	 * must be null terminated
+	 */
+	struct gen8_nonctxt_overrides *nc_overrides;
+	/** @nc_mutex: Mutex to protect nc_overrides updates */
+	struct mutex nc_mutex;
+	/** @nc_overrides_enabled: Set through debugfs path when any override is enabled */
+	bool nc_overrides_enabled;
+};
+
+/**
+ * struct gen8_pwrup_extlist - container for a powerup external reglist
+ */
+struct gen8_pwrup_extlist {
+	/** offset: Dword offset of the register to write */
+	u32 offset;
+	/** pipelines: pipelines to write */
+	u32 pipelines;
+};
+
+/**
+ * struct gen8_protected_regs - container for a protect register span
+ */
+struct gen8_protected_regs {
+	/** @reg: Physical protected mode register to write to */
+	u32 reg;
+	/** @start: Dword offset of the starting register in the range */
+	u32 start;
+	/** @end: Dword offset of the ending register in the range (inclusive) */
+	u32 end;
+	/**
+	 * @noaccess: 1 if the register should not be accessible from
+	 * userspace, 0 if it can be read (but not written)
+	 */
+	u32 noaccess;
+};
+
+/**
+ * struct gen8_nonctxt_regs - Container for non context registers span
+ */
+struct gen8_nonctxt_regs {
+	/** @offset: Dword offset of the register to write */
+	u32 offset;
+	/** @val: Value to write */
+	u32 val;
+	/** @pipelines: pipelines to write */
+	u32 pipelines;
+};
+
+/**
+ * struct adreno_gen8_core - gen8 specific GPU core definitions
+ */
+struct adreno_gen8_core {
+	/** @base: Container for the generic GPU definitions */
+	struct adreno_gpu_core base;
+	/** @gmu_fw_version: Minimum firmware version required to support this core */
+	u32 gmu_fw_version;
+	/** @sqefw_name: Name of the SQE microcode file */
+	const char *sqefw_name;
+	/** @aqefw_name: Name of the AQE microcode file */
+	const char *aqefw_name;
+	/** @gmufw_name: Name of the GMU firmware file */
+	const char *gmufw_name;
+	/** @zap_name: Name of the CPZ zap file */
+	const char *zap_name;
+	/** @ao_hwcg: List of registers and values to write for HWCG in AO block */
+	const struct kgsl_regmap_list *ao_hwcg;
+	/** @ao_hwcg_count: Number of registers in @ao_hwcg */
+	u32 ao_hwcg_count;
+	/** @gbif: List of registers and values to write for GBIF */
+	const struct kgsl_regmap_list *gbif;
+	/** @gbif_count: Number of registers in @gbif */
+	u32 gbif_count;
+	/** @hang_detect_cycles: Hang detect counter timeout value */
+	u32 hang_detect_cycles;
+	/** @protected_regs: Array of protected registers for the target */
+	const struct gen8_protected_regs *protected_regs;
+	/** @nonctxt_regs: Array of non context register list */
+	const struct gen8_nonctxt_regs *nonctxt_regs;
+	/** @ctxt_record_size: Size of the preemption record in bytes */
+	u64 ctxt_record_size;
+	/** @highest_bank_bit: Highest bank bit value */
+	u32 highest_bank_bit;
+	/** @gen8_snapshot_block_list: Device-specific blocks dumped in the snapshot */
+	const struct gen8_snapshot_block_list *gen8_snapshot_block_list;
+	/** @gmu_hub_clk_freq: Gmu hub interface clock frequency */
+	u64 gmu_hub_clk_freq;
+	/**
+	 * @bcl_data: bit 0 contains response type for bcl alarms and bits 1:21 controls sid vals
+	 * to configure throttle levels for bcl alarm levels 0-2. If sid vals are not set,
+	 * gmu fw sets default throttle levels.
+	 */
+	u32 bcl_data;
+	/** @preempt_level: Preemption level valid ranges [0 to 2] */
+	u32 preempt_level;
+	/** @qos_value: GPU qos value to set for each RB. */
+	const u32 *qos_value;
+	/**
+	 * @acv_perfmode_ddr_freq: Vote perfmode when DDR frequency >= acv_perfmode_ddr_freq.
+	 * If not specified, vote perfmode for highest DDR level only.
+	 */
+	u32 acv_perfmode_ddr_freq;
+	/** @rt_bus_hint: IB level hint for real time clients i.e. RB-0 */
+	const u32 rt_bus_hint;
+	/** @fast_bus_hint: Whether or not to increase IB vote on high ddr stall */
+	bool fast_bus_hint;
+	/** @noc_timeout_us: GPU config NOC port timeout in usec */
+	u32 noc_timeout_us;
+};
+
+/**
+ * struct gen8_cp_preemption_record - CP context record for
+ * preemption.
+ * @magic: (00) Value at this offset must be equal to
+ * GEN8_CP_CTXRECORD_MAGIC_REF.
+ * @info: (04) Type of record. Written non-zero (usually) by CP.
+ * we must set to zero for all ringbuffers.
+ * @errno: (08) Error code. Initialize this to GEN8_CP_CTXRECORD_ERROR_NONE.
+ * CP will update to another value if a preemption error occurs.
+ * @data: (12) DATA field in YIELD and SET_MARKER packets.
+ * Written by CP when switching out. Not used on switch-in. Initialized to 0.
+ * @cntl: (16) RB_CNTL, saved and restored by CP. We must initialize this.
+ * @rptr: (20) RB_RPTR, saved and restored by CP. We must initialize this.
+ * @wptr: (24) RB_WPTR, saved and restored by CP. We must initialize this.
+ * @_pad28: (28) Reserved/padding.
+ * @rptr_addr: (32) RB_RPTR_ADDR_LO|HI saved and restored. We must initialize.
+ * rbase: (40) RB_BASE_LO|HI saved and restored.
+ * counter: (48) Pointer to preemption counter.
+ * @bv_rptr_addr: (56) BV_RB_RPTR_ADDR_LO|HI save and restored. We must initialize.
+ */
+struct gen8_cp_preemption_record {
+	u32 magic;
+	u32 info;
+	u32 errno;
+	u32 data;
+	u32 cntl;
+	u32 rptr;
+	u32 wptr;
+	u32 _pad28;
+	u64 rptr_addr;
+	u64 rbase;
+	u64 counter;
+	u64 bv_rptr_addr;
+};
+
+/**
+ * struct gen8_cp_smmu_info - CP preemption SMMU info.
+ * @magic: (00) The value at this offset must be equal to
+ * GEN8_CP_SMMU_INFO_MAGIC_REF
+ * @_pad4: (04) Reserved/padding
+ * @ttbr0: (08) Base address of the page table for the * incoming context
+ * @asid: (16) Address Space IDentifier (ASID) of the incoming context
+ * @context_idr: (20) Context Identification Register value
+ * @context_bank: (24) Which Context Bank in SMMU to update
+ */
+struct gen8_cp_smmu_info {
+	u32 magic;
+	u32 _pad4;
+	u64 ttbr0;
+	u32 asid;
+	u32 context_idr;
+	u32 context_bank;
+};
+
+#define GEN8_CP_SMMU_INFO_MAGIC_REF		0x241350d5UL
+
+#define GEN8_CP_CTXRECORD_MAGIC_REF		0xae399d6eUL
+/* Size of each CP preemption record */
+#define GEN8_CP_CTXRECORD_SIZE_IN_BYTES		(13536 * SZ_1K)
+/* Size of preemption record to be dumped in snapshot */
+#define GEN8_SNAPSHOT_CTXRECORD_SIZE_IN_BYTES	(128 * 1024)
+/* Size of the user context record block (in bytes) */
+#define GEN8_CP_CTXRECORD_USER_RESTORE_SIZE	(192 * 1024)
+/* Size of the performance counter save/restore block (in bytes) */
+#define GEN8_CP_PERFCOUNTER_SAVE_RESTORE_SIZE	(4 * 1024)
+
+#define GEN8_CP_RB_CNTL_DEFAULT \
+	(FIELD_PREP(GENMASK(7, 0), ilog2(KGSL_RB_DWORDS >> 1)) | \
+	 FIELD_PREP(GENMASK(12, 8), ilog2(4)))
+
+/* Size of the CP_INIT pm4 stream in dwords */
+#define GEN8_CP_INIT_DWORDS 10
+
+#define GEN8_INT_MASK \
+	((1 << GEN8_INT_AHBERROR) |			\
+	 (1 << GEN8_INT_ATBASYNCFIFOOVERFLOW) |		\
+	 (1 << GEN8_INT_GPCERROR) |			\
+	 (1 << GEN8_INT_SWINTERRUPT) |			\
+	 (1 << GEN8_INT_HWERROR) |			\
+	 (1 << GEN8_INT_PM4CPINTERRUPT) |		\
+	 (1 << GEN8_INT_RB_DONE_TS) |			\
+	 (1 << GEN8_INT_CACHE_CLEAN_TS) |		\
+	 (1 << GEN8_INT_ATBBUSOVERFLOW) |		\
+	 (1 << GEN8_INT_HANGDETECTINTERRUPT) |		\
+	 (1 << GEN8_INT_OUTOFBOUNDACCESS) |		\
+	 (1 << GEN8_INT_UCHETRAPINTERRUPT) |		\
+	 (1 << GEN8_INT_TSBWRITEERROR) |		\
+	 (1 << GEN8_INT_SWFUSEVIOLATION))
+
+#define GEN8_HWSCHED_INT_MASK \
+	((1 << GEN8_INT_AHBERROR) |			\
+	 (1 << GEN8_INT_ATBASYNCFIFOOVERFLOW) |		\
+	 (1 << GEN8_INT_ATBBUSOVERFLOW) |		\
+	 (1 << GEN8_INT_OUTOFBOUNDACCESS) |		\
+	 (1 << GEN8_INT_UCHETRAPINTERRUPT) |		\
+	 (1 << GEN8_INT_TSBWRITEERROR))
+
+/* GEN8 CX MISC interrupt bits */
+#define GEN8_CX_MISC_GPU_CC_IRQ	31
+
+#define GEN8_CX_MISC_INT_MASK	BIT(GEN8_CX_MISC_GPU_CC_IRQ)
+
+/**
+ * to_gen8_core - return the gen8 specific GPU core struct
+ * @adreno_dev: An Adreno GPU device handle
+ *
+ * Returns:
+ * A pointer to the gen8 specific GPU core struct
+ */
+static inline const struct adreno_gen8_core *
+to_gen8_core(struct adreno_device *adreno_dev)
+{
+	const struct adreno_gpu_core *core = adreno_dev->gpucore;
+
+	return container_of(core, struct adreno_gen8_core, base);
+}
+
+/* Preemption functions */
+void gen8_preemption_trigger(struct adreno_device *adreno_dev, bool atomic);
+void gen8_preemption_schedule(struct adreno_device *adreno_dev);
+void gen8_preemption_start(struct adreno_device *adreno_dev);
+int gen8_preemption_init(struct adreno_device *adreno_dev);
+
+u32 gen8_preemption_post_ibsubmit(struct adreno_device *adreno_dev,
+		u32 *cmds);
+u32 gen8_preemption_pre_ibsubmit(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb, struct adreno_context *drawctxt,
+		u32 *cmds);
+
+u32 gen8_set_marker(u32 *cmds, enum adreno_cp_marker_type type);
+
+void gen8_preemption_callback(struct adreno_device *adreno_dev, int bit);
+
+int gen8_preemption_context_init(struct kgsl_context *context);
+
+void gen8_preemption_context_destroy(struct kgsl_context *context);
+
+void gen8_preemption_prepare_postamble(struct adreno_device *adreno_dev);
+
+void gen8_snapshot(struct adreno_device *adreno_dev,
+		struct kgsl_snapshot *snapshot);
+void gen8_crashdump_init(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_snapshot_external_core_regs - Dump external registers into snapshot
+ * @device: Pointer to KGSL device
+ * @snapshot: Pointer to the snapshot
+ *
+ * Dump external core registers like GPUCC, CPR into GPU snapshot.
+ */
+void gen8_snapshot_external_core_regs(struct kgsl_device *device,
+		struct kgsl_snapshot *snapshot);
+
+/**
+ * gen8_enable_ahb_timeout_detection - Program AHB control registers
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * Program AHB control registers to enable AHB timeout detection.
+ *
+ */
+void gen8_enable_ahb_timeout_detection(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_start - Program gen8 registers
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * This function does all gen8 register programming every
+ * time we boot the gpu
+ *
+ * Return: 0 on success or negative on failure
+ */
+int gen8_start(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_init - Initialize gen8 resources
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * This function does gen8 specific one time initialization
+ * and is invoked when the very first client opens a
+ * kgsl instance
+ *
+ * Return: Zero on success and negative error on failure
+ */
+int gen8_init(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_cx_timer_init - Initialize the CX timer on Gen8 devices
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Synchronize the GPU CX timer (if we have one) with the CPU timer
+ */
+void gen8_cx_timer_init(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_get_gpu_feature_info - Get hardware supported feature info
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Get HW supported feature info and update sofware feature configuration
+ */
+void gen8_get_gpu_feature_info(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_rb_start - Gen8 specific ringbuffer setup
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * This function does gen8 specific ringbuffer setup and
+ * attempts to submit CP INIT and bring GPU out of secure mode
+ *
+ * Return: Zero on success and negative error on failure
+ */
+int gen8_rb_start(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_microcode_read - Get the cp microcode from the filesystem
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * This function gets the firmware from filesystem and sets up
+ * the micorocode global buffer
+ *
+ * Return: Zero on success and negative error on failure
+ */
+int gen8_microcode_read(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_probe_common - Probe common gen8 resources
+ * @pdev: Pointer to the platform device
+ * @adreno_dev: Pointer to the adreno device
+ * @chipid: Chipid of the target
+ * @gpucore: Pointer to the gpucore strucure
+ *
+ * This function sets up the gen8 resources common across all
+ * gen8 targets
+ */
+int gen8_probe_common(struct platform_device *pdev,
+	struct adreno_device *adreno_dev, u32 chipid,
+	const struct adreno_gpu_core *gpucore);
+
+/**
+ * gen8_hw_isidle - Check whether gen8 gpu is idle or not
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * Return: True if gpu is idle, otherwise false
+ */
+bool gen8_hw_isidle(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_spin_idle_debug - Debug logging used when gpu fails to idle
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * This function logs interesting registers and triggers a snapshot
+ */
+void gen8_spin_idle_debug(struct adreno_device *adreno_dev,
+	const char *str);
+
+/**
+ * gen8_perfcounter_update - Update the IFPC perfcounter list
+ * @adreno_dev: An Adreno GPU handle
+ * @reg: Perfcounter reg struct to add/remove to the list
+ * @update_reg: true if the perfcounter needs to be programmed by the CPU
+ * @pipe: pipe id for CP aperture control
+ * @flags: Flags set for requested perfcounter group
+ *
+ * Return: 0 on success or -EBUSY if the lock couldn't be taken
+ */
+int gen8_perfcounter_update(struct adreno_device *adreno_dev,
+	struct adreno_perfcount_register *reg, bool update_reg, u32 pipe,
+	unsigned long flags);
+
+/*
+ * gen8_ringbuffer_init - Initialize the ringbuffers
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * Initialize the ringbuffer(s) for a5xx.
+ * Return: 0 on success or negative on failure
+ */
+int gen8_ringbuffer_init(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_ringbuffer_submitcmd - Submit a user command to the ringbuffer
+ * @adreno_dev: An Adreno GPU handle
+ * @cmdobj: Pointer to a user command object
+ * @flags: Internal submit flags
+ * @time: Optional pointer to a adreno_submit_time container
+ *
+ * Return: 0 on success or negative on failure
+ */
+int gen8_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
+		struct kgsl_drawobj_cmd *cmdobj, u32 flags,
+		struct adreno_submit_time *time);
+
+/**
+ * gen8_ringbuffer_submit - Submit a command to the ringbuffer
+ * @rb: Ringbuffer pointer
+ * @time: Optional pointer to a adreno_submit_time container
+ *
+ * Return: 0 on success or negative on failure
+ */
+int gen8_ringbuffer_submit(struct adreno_ringbuffer *rb,
+		struct adreno_submit_time *time);
+
+/**
+ * gen8_fenced_write - Write to a fenced register
+ * @adreno_dev: An Adreno GPU handle
+ * @offset: Register offset
+ * @value: Value to write
+ * @mask: Expected FENCE_STATUS for successful write
+ *
+ * Return: 0 on success or negative on failure
+ */
+int gen8_fenced_write(struct adreno_device *adreno_dev, u32 offset,
+		u32 value, u32 mask);
+
+/**
+ * gen87ringbuffer_addcmds - Wrap and submit commands to the ringbuffer
+ * @adreno_dev: An Adreno GPU handle
+ * @rb: Ringbuffer pointer
+ * @drawctxt: Draw context submitting the commands
+ * @flags: Submission flags
+ * @in: Input buffer to write to ringbuffer
+ * @dwords: Dword length of @in
+ * @timestamp: Draw context timestamp for the submission
+ * @time: Optional pointer to a adreno_submit_time container
+ *
+ * Return: 0 on success or negative on failure
+ */
+int gen8_ringbuffer_addcmds(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb, struct adreno_context *drawctxt,
+		u32 flags, u32 *in, u32 dwords, u32 timestamp,
+		struct adreno_submit_time *time);
+
+/**
+ * gen8_cp_init_cmds - Create the CP_INIT commands
+ * @adreno_dev: An Adreno GPU handle
+ * @cmd: Buffer to write the CP_INIT commands into
+ */
+void gen8_cp_init_cmds(struct adreno_device *adreno_dev, u32 *cmds);
+
+/**
+ * gen8_gmu_hfi_probe - Probe Gen8 HFI specific data
+ * @adreno_dev: An Adreno GPU handle
+ *
+ * Return: 0 on success or negative on failure
+ */
+int gen8_gmu_hfi_probe(struct adreno_device *adreno_dev);
+
+static inline const struct gen8_gpudev *
+to_gen8_gpudev(const struct adreno_gpudev *gpudev)
+{
+	return container_of(gpudev, struct gen8_gpudev, base);
+}
+
+/**
+ * gen8_reset_preempt_records - Reset the preemption buffers
+ * @adreno_dev: Handle to the adreno device
+ *
+ * Reset the preemption records at the time of hard reset
+ */
+void gen8_reset_preempt_records(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_rdpm_mx_freq_update - Update the mx frequency
+ * @gmu: An Adreno GMU handle
+ * @freq: Frequency in KHz
+ *
+ * This function communicates GPU mx frequency(in Mhz) changes to rdpm.
+ */
+void gen8_rdpm_mx_freq_update(struct gen8_gmu_device *gmu, u32 freq);
+
+/**
+ * gen8_rdpm_cx_freq_update - Update the cx frequency
+ * @gmu: An Adreno GMU handle
+ * @freq: Frequency in KHz
+ *
+ * This function communicates GPU cx frequency(in Mhz) changes to rdpm.
+ */
+void gen8_rdpm_cx_freq_update(struct gen8_gmu_device *gmu, u32 freq);
+
+/**
+ * gen8_scm_gpu_init_cx_regs - Program gpu regs for feature support
+ * @adreno_dev: Handle to the adreno device
+ *
+ * Program gpu regs for feature support. Scm call for the same
+ * is added from kernel version 6.0 onwards.
+ *
+ * Return: 0 on success or negative on failure
+ */
+int gen8_scm_gpu_init_cx_regs(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_legacy_snapshot_registers - Dump registers for GPU/GMU
+ * @device: Handle to the KGSL device
+ * @buf: Target buffer to copy the data
+ * @remain: Buffer size remaining for dump
+ * @priv: Private data to dump the registers
+ *
+ * Return: Size of the section
+ */
+size_t gen8_legacy_snapshot_registers(struct kgsl_device *device,
+		 u8 *buf, size_t remain, void *priv);
+
+/**
+ * gen8_regread64_aperture - Read 64 bit register values
+ * @device: Handle to the KGSL device
+ * @offsetwords_lo: Lower 32 bit address to read
+ * @offsetwords_hi: Higher 32 bit address to read
+ * @value: The value of register at offsetwords
+ * @pipe: Pipe for which the register is to be read
+ * @slice_id: Slice for which the register is to be read
+ * @use_slice_id: Set if the value to be read is from a sliced register
+ *
+ * This function reads the 64 bit value for registers
+ */
+void gen8_regread64_aperture(struct kgsl_device *device,
+	u32 offsetwords_lo, u32 offsetwords_hi, u64 *value, u32 pipe,
+	u32 slice_id, u32 use_slice_id);
+
+/**
+ * gen8_regread_aperture - Read 32 bit register values
+ * @device: Handle to the KGSL device
+ * @offsetwords: 32 bit address to read
+ * @value: The value of register at offsetwords
+ * @pipe: Pipe for which the register is to be read
+ * @slice_id: Slice for which the register is to be read
+ * @use_slice_id: Set if the value to be read is from a sliced register
+ *
+ * This function reads the 32 bit value for registers
+ */
+void gen8_regread_aperture(struct kgsl_device *device,
+	u32 offsetwords, u32 *value, u32 pipe, u32 slice_id, u32 use_slice_id);
+
+
+/**
+ * gen8_host_aperture_set - Program CP aperture register
+ * @adreno_dev: Handle to the adreno device
+ * @pipe_id: Pipe for which the register is to be set
+ * @slice_id: Slice for which the register is to be set
+ * @use_slice_id: Set if the value to be read is from a sliced register
+ *
+ * This function programs CP aperture register
+ */
+void gen8_host_aperture_set(struct adreno_device *adreno_dev, u32 pipe_id,
+		u32 slice_id, u32 use_slice_id);
+#endif
--- a/qcom/opensource/graphics-kernel/adreno_gen8_3_0_snapshot.h
+++ b/qcom/opensource/graphics-kernel/adreno_gen8_3_0_snapshot.h
--- a/qcom/opensource/graphics-kernel/adreno_gen8_gmu.c
+++ b/qcom/opensource/graphics-kernel/adreno_gen8_gmu.c
--- a/qcom/opensource/graphics-kernel/adreno_gen8_gmu.h
+++ b/qcom/opensource/graphics-kernel/adreno_gen8_gmu.h
@ -0,0 +1,505 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef __ADRENO_GEN8_GMU_H
+#define __ADRENO_GEN8_GMU_H
+
+#include <linux/soc/qcom/qcom_aoss.h>
+
+#include "adreno_gen8_hfi.h"
+#include "kgsl_gmu_core.h"
+
+struct gen8_dcvs_table {
+	u32 gpu_level_num;
+	u32 gmu_level_num;
+	struct opp_gx_desc gx_votes[MAX_GX_LEVELS];
+	struct opp_desc cx_votes[MAX_CX_LEVELS];
+};
+
+/**
+ * struct gen8_gmu_device - GMU device structure
+ * @ver: GMU Version information
+ * @irq: GMU interrupt number
+ * @fw_image: GMU FW image
+ * @hfi_mem: pointer to HFI shared memory
+ * @dump_mem: pointer to GMU debug dump memory
+ * @gmu_log: gmu event log memory
+ * @hfi: HFI controller
+ * @num_gpupwrlevels: number GPU frequencies in GPU freq table
+ * @num_bwlevel: number of GPU BW levels
+ * @num_cnocbwlevel: number CNOC BW levels
+ * @rpmh_votes: RPMh TCS command set for GPU, GMU voltage and bw scaling
+ * @clks: GPU subsystem clocks required for GMU functionality
+ * @wakeup_pwrlevel: GPU wake up power/DCVS level in case different
+ *  than default power level
+ * @idle_level: Minimal GPU idle power level
+ * @fault_count: GMU fault count
+ * @log_wptr_retention: Store the log wptr offset on slumber
+ */
+struct gen8_gmu_device {
+	struct {
+		u32 core;
+		u32 core_dev;
+		u32 pwr;
+		u32 pwr_dev;
+		u32 hfi;
+	} ver;
+	struct platform_device *pdev;
+	int irq;
+	const struct firmware *fw_image;
+	struct kgsl_memdesc *dump_mem;
+	struct kgsl_memdesc *gmu_log;
+	/** @vrb: GMU virtual register bank memory */
+	struct kgsl_memdesc *vrb;
+	/** @trace: gmu trace container */
+	struct kgsl_gmu_trace trace;
+	/** @gmu_init_scratch: Memory to store the initial HFI messages */
+	struct kgsl_memdesc *gmu_init_scratch;
+	/** @gpu_boot_scratch: Memory to store the bootup HFI messages */
+	struct kgsl_memdesc *gpu_boot_scratch;
+	struct gen8_hfi hfi;
+	/** @pwrlevels: Array of GMU power levels */
+	struct clk_bulk_data *clks;
+	/** @num_clks: Number of entries in the @clks array */
+	int num_clks;
+	u32 idle_level;
+	/** @freqs: Array of GMU frequencies */
+	u32 freqs[GMU_MAX_PWRLEVELS];
+	/** @vlvls: Array of GMU voltage levels */
+	u32 vlvls[GMU_MAX_PWRLEVELS];
+	/** @qmp: aoss_qmp handle */
+	struct qmp *qmp;
+	/** @gmu_globals: Array to store gmu global buffers */
+	struct kgsl_memdesc gmu_globals[GMU_KERNEL_ENTRIES];
+	/** @global_entries: To keep track of number of gmu buffers */
+	u32 global_entries;
+	struct gmu_vma_entry *vma;
+	u32 log_wptr_retention;
+	/** @cm3_fault: whether gmu received a cm3 fault interrupt */
+	atomic_t cm3_fault;
+	/**
+	 * @itcm_shadow: Copy of the itcm block in firmware binary used for
+	 * snapshot
+	 */
+	void *itcm_shadow;
+	/** @flags: Internal gmu flags */
+	unsigned long flags;
+	/** @rscc_virt: Pointer where RSCC block is mapped */
+	void __iomem *rscc_virt;
+	/** @domain: IOMMU domain for the kernel context */
+	struct iommu_domain *domain;
+	/** @log_stream_enable: GMU log streaming enable. Disabled by default */
+	bool log_stream_enable;
+	/** @log_group_mask: Allows overriding default GMU log group mask */
+	u32 log_group_mask;
+	struct kobject log_kobj;
+	/*
+	 * @perf_ddr_bw: The lowest ddr bandwidth that puts CX at a corner at
+	 * which GMU can run at higher frequency.
+	 */
+	u32 perf_ddr_bw;
+	/** @rdpm_cx_virt: Pointer where the RDPM CX block is mapped */
+	void __iomem *rdpm_cx_virt;
+	/** @rdpm_mx_virt: Pointer where the RDPM MX block is mapped */
+	void __iomem *rdpm_mx_virt;
+	/** @num_oob_perfcntr: Number of active oob_perfcntr requests */
+	u32 num_oob_perfcntr;
+	/** @acd_debug_val: DVM value to calibrate ACD for a level */
+	u32 acd_debug_val;
+	/** @stats_enable: GMU stats feature enable */
+	bool stats_enable;
+	/** @stats_mask: GMU performance countables to enable */
+	u32 stats_mask;
+	/** @stats_interval: GMU performance counters sampling interval */
+	u32 stats_interval;
+	/** @stats_kobj: kernel object for GMU stats directory in sysfs */
+	struct kobject stats_kobj;
+	/** @cp_init_hdr: raw command header for cp_init */
+	u32 cp_init_hdr;
+	/** @switch_to_unsec_hdr: raw command header for switch to unsecure packet */
+	u32 switch_to_unsec_hdr;
+	/** @dcvs_table: Table for gpu dcvs levels */
+	struct gen8_dcvs_table dcvs_table;
+};
+
+/* Helper function to get to gen8 gmu device from adreno device */
+struct gen8_gmu_device *to_gen8_gmu(struct adreno_device *adreno_dev);
+
+/* Helper function to get to adreno device from gen8 gmu device */
+struct adreno_device *gen8_gmu_to_adreno(struct gen8_gmu_device *gmu);
+
+/**
+ * gen8_reserve_gmu_kernel_block() - Allocate a global gmu buffer
+ * @gmu: Pointer to the gen8 gmu device
+ * @addr: Desired gmu virtual address
+ * @size: Size of the buffer in bytes
+ * @vma_id: Target gmu vma where this buffer should be mapped
+ * @align: Alignment for the GMU VA and GMU mapping size
+ *
+ * This function allocates a global gmu buffer and maps it in
+ * the desired gmu vma
+ *
+ * Return: Pointer to the memory descriptor or error pointer on failure
+ */
+struct kgsl_memdesc *gen8_reserve_gmu_kernel_block(struct gen8_gmu_device *gmu,
+		u32 addr, u32 size, u32 vma_id, u32 align);
+
+/**
+ * gen8_reserve_gmu_kernel_block_fixed() - Maps phyical resource address to gmu
+ * @gmu: Pointer to the gen8 gmu device
+ * @addr: Desired gmu virtual address
+ * @size: Size of the buffer in bytes
+ * @vma_id: Target gmu vma where this buffer should be mapped
+ * @resource: Name of the resource to get the size and address to allocate
+ * @attrs: Attributes for the mapping
+ * @align: Alignment for the GMU VA and GMU mapping size
+ *
+ * This function maps the physcial resource address to desired gmu vma
+ *
+ * Return: Pointer to the memory descriptor or error pointer on failure
+ */
+struct kgsl_memdesc *gen8_reserve_gmu_kernel_block_fixed(struct gen8_gmu_device *gmu,
+	u32 addr, u32 size, u32 vma_id, const char *resource, int attrs, u32 align);
+
+/**
+ * gen8_alloc_gmu_kernel_block() - Allocate a gmu buffer
+ * @gmu: Pointer to the gen8 gmu device
+ * @md: Pointer to the memdesc
+ * @size: Size of the buffer in bytes
+ * @vma_id: Target gmu vma where this buffer should be mapped
+ * @attrs: Attributes for the mapping
+ *
+ * This function allocates a buffer and maps it in the desired gmu vma
+ *
+ * Return: 0 on success or error code on failure
+ */
+int gen8_alloc_gmu_kernel_block(struct gen8_gmu_device *gmu,
+	struct kgsl_memdesc *md, u32 size, u32 vma_id, int attrs);
+
+/**
+ * gen8_gmu_import_buffer() - Import a gmu buffer
+ * @gmu: Pointer to the gen8 gmu device
+ * @vma_id: Target gmu vma where this buffer should be mapped
+ * @md: Pointer to the memdesc to be mapped
+ * @attrs: Attributes for the mapping
+ * @align: Alignment for the GMU VA and GMU mapping size
+ *
+ * This function imports and maps a buffer to a gmu vma
+ *
+ * Return: 0 on success or error code on failure
+ */
+int gen8_gmu_import_buffer(struct gen8_gmu_device *gmu, u32 vma_id,
+			struct kgsl_memdesc *md, u32 attrs, u32 align);
+
+/**
+ * gen8_free_gmu_block() - Free a gmu buffer
+ * @gmu: Pointer to the gen8 gmu device
+ * @md: Pointer to the memdesc that is to be freed
+ *
+ * This function frees a gmu block allocated by gen8_reserve_gmu_kernel_block()
+ */
+void gen8_free_gmu_block(struct gen8_gmu_device *gmu, struct kgsl_memdesc *md);
+
+/**
+ * gen8_build_rpmh_tables - Build the rpmh tables
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * This function creates the gpu dcvs and bw tables
+ *
+ * Return: 0 on success and negative error on failure
+ */
+int gen8_build_rpmh_tables(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_gmu_gx_is_on - Check if GX is on
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * This function reads pwr status registers to check if GX
+ * is on or off
+ */
+bool gen8_gmu_gx_is_on(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_gmu_device_probe - GEN8 GMU snapshot function
+ * @pdev: Pointer to the platform device
+ * @chipid: Chipid of the target
+ * @gpucore: Pointer to the gpucore
+ *
+ * The target specific probe function for gmu based gen8 targets.
+ */
+int gen8_gmu_device_probe(struct platform_device *pdev,
+		u32 chipid, const struct adreno_gpu_core *gpucore);
+
+/**
+ * gen8_gmu_reset - Reset and restart the gmu
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_gmu_reset(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_enable_gpu_irq - Enable gpu interrupt
+ * @adreno_dev: Pointer to the adreno device
+ */
+void gen8_enable_gpu_irq(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_disable_gpu_irq - Disable gpu interrupt
+ * @adreno_dev: Pointer to the adreno device
+ */
+void gen8_disable_gpu_irq(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_gmu_snapshot- Take snapshot for gmu targets
+ * @adreno_dev: Pointer to the adreno device
+ * @snapshot: Pointer to the snapshot structure
+ *
+ * Send an NMI to gmu if we hit a gmu fault. Then take gmu
+ * snapshot and carry on with rest of the gen8 snapshot
+ */
+void gen8_gmu_snapshot(struct adreno_device *adreno_dev,
+		struct kgsl_snapshot *snapshot);
+
+/**
+ * gen8_gmu_probe - Probe gen8 gmu resources
+ * @device: Pointer to the kgsl device
+ * @pdev: Pointer to the gmu platform device
+ *
+ * Probe the gmu and hfi resources
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_gmu_probe(struct kgsl_device *device,
+		struct platform_device *pdev);
+
+/**
+ * gen8_gmu_parse_fw - Parse the gmu fw binary
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_gmu_parse_fw(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_gmu_memory_init - Allocate gmu memory
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Allocates the gmu log buffer and others if ndeeded.
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_gmu_memory_init(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_gmu_aop_send_acd_state - Enable or disable acd feature in aop
+ * @gmu: Pointer to the gen8 gmu device
+ * @flag: Boolean to enable or disable acd in aop
+ *
+ * This function enables or disables gpu acd feature using qmp
+ */
+void gen8_gmu_aop_send_acd_state(struct gen8_gmu_device *gmu, bool flag);
+
+/**
+ * gen8_gmu_load_fw - Load gmu firmware
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Loads the gmu firmware binary into TCMs and memory
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_gmu_load_fw(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_gmu_device_start - Bring gmu out of reset
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_gmu_device_start(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_gmu_hfi_start - Indicate hfi start to gmu
+ * @device: Pointer to the kgsl device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_gmu_hfi_start(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_gmu_itcm_shadow - Create itcm shadow copy for snapshot
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_gmu_itcm_shadow(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_gmu_register_config - gmu register configuration
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Program gmu regsiters based on features
+ */
+void gen8_gmu_register_config(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_gmu_version_info - Get gmu firmware version
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_gmu_version_info(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_gmu_irq_enable - Enable gmu interrupts
+ * @adreno_dev: Pointer to the adreno device
+ */
+void gen8_gmu_irq_enable(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_gmu_irq_disable - Disaable gmu interrupts
+ * @adreno_dev: Pointer to the adreno device
+ */
+void gen8_gmu_irq_disable(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_gmu_suspend - Hard reset the gpu and gmu
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * In case we hit a gmu fault, hard reset the gpu and gmu
+ * to recover from the fault
+ */
+void gen8_gmu_suspend(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_gmu_oob_set - send gmu oob request
+ * @device: Pointer to the kgsl device
+ * @req: Type of oob request as defined in enum oob_request
+ *
+ * Request gmu to keep gpu powered up till the oob is cleared
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_gmu_oob_set(struct kgsl_device *device, enum oob_request oob);
+
+/**
+ * gen8_gmu_oob_clear - clear an asserted oob request
+ * @device: Pointer to the kgsl device
+ * @req: Type of oob request as defined in enum oob_request
+ *
+ * Clear a previously requested oob so that gmu can power
+ * collapse the gpu
+ */
+void gen8_gmu_oob_clear(struct kgsl_device *device, enum oob_request oob);
+
+/**
+ * gen8_gmu_wait_for_lowest_idle - wait for gmu to complete ifpc
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * If ifpc is enabled, wait for gmu to put gpu into ifpc.
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_gmu_wait_for_lowest_idle(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_gmu_wait_for_idle - Wait for gmu to become idle
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_gmu_wait_for_idle(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_rscc_sleep_sequence - Trigger rscc sleep sequence
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_rscc_sleep_sequence(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_rscc_wakeup_sequence - Trigger rscc wakeup sequence
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_rscc_wakeup_sequence(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_halt_gbif - Halt CX and GX requests in GBIF
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Clear any pending GX or CX transactions in GBIF and
+ * deassert GBIF halt
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_halt_gbif(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_gmu_remove - Clean up gmu probed resources
+ * @device: Pointer to the kgsl device
+ */
+void gen8_gmu_remove(struct kgsl_device *device);
+
+/**
+ * gen8_gmu_enable_clks - Enable gmu clocks
+ * @adreno_dev: Pointer to the adreno device
+ * @level: GMU frequency level
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_gmu_enable_clks(struct adreno_device *adreno_dev, u32 level);
+
+/**
+ * gen8_gmu_handle_watchdog - Handle watchdog interrupt
+ * @adreno_dev: Pointer to the adreno device
+ */
+void gen8_gmu_handle_watchdog(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_gmu_send_nmi - Send NMI to GMU
+ * @device: Pointer to the kgsl device
+ * @force: Boolean to forcefully send NMI irrespective of GMU state
+ */
+void gen8_gmu_send_nmi(struct kgsl_device *device, bool force);
+
+/**
+ * gen8_gmu_add_to_minidump - Register gen8_device with va minidump
+ * @adreno_dev: Pointer to the adreno device
+ */
+int gen8_gmu_add_to_minidump(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_snapshot_gmu_mem - Snapshot a GMU memory descriptor
+ * @device: Pointer to the kgsl device
+ * @buf: Destination snapshot buffer
+ * @remain: Remaining size of the snapshot buffer
+ * @priv: Opaque handle
+ *
+ * Return: Number of bytes written to snapshot buffer
+ */
+size_t gen8_snapshot_gmu_mem(struct kgsl_device *device,
+	u8 *buf, size_t remain, void *priv);
+
+/**
+ * gen8_bus_ab_quantize - Calculate the AB vote that needs to be sent to GMU
+ * @adreno_dev: Handle to the adreno device
+ * @ab: ab request that needs to be scaled in MBps
+ *
+ * Returns the AB value that needs to be prefixed to bandwidth vote in kbps
+ */
+u32 gen8_bus_ab_quantize(struct adreno_device *adreno_dev, u32 ab);
+
+/**
+ * gen8_gmu_rpmh_pwr_state_is_active - Check the state of GPU HW
+ * @device: Pointer to the kgsl device
+ *
+ * Returns true on active or false otherwise
+ */
+bool gen8_gmu_rpmh_pwr_state_is_active(struct kgsl_device *device);
+#endif
--- a/qcom/opensource/graphics-kernel/adreno_gen8_gmu_snapshot.c
+++ b/qcom/opensource/graphics-kernel/adreno_gen8_gmu_snapshot.c
@ -0,0 +1,310 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include "adreno.h"
+#include "adreno_gen8.h"
+#include "adreno_gen8_gmu.h"
+#include "adreno_gen8_3_0_snapshot.h"
+#include "adreno_snapshot.h"
+#include "gen8_reg.h"
+#include "kgsl_device.h"
+
+size_t gen8_snapshot_gmu_mem(struct kgsl_device *device,
+		u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_gmu_mem *mem_hdr =
+		(struct kgsl_snapshot_gmu_mem *)buf;
+	u32 *data = (u32 *)(buf + sizeof(*mem_hdr));
+	struct gmu_mem_type_desc *desc = priv;
+
+	if (priv == NULL || desc->memdesc->hostptr == NULL)
+		return 0;
+
+	if (remain < desc->memdesc->size + sizeof(*mem_hdr)) {
+		dev_err(device->dev,
+			"snapshot: Not enough memory for the gmu section %d\n",
+			desc->type);
+		return 0;
+	}
+
+	mem_hdr->type = desc->type;
+	mem_hdr->hostaddr = (u64)(uintptr_t)desc->memdesc->hostptr;
+	mem_hdr->gmuaddr = desc->memdesc->gmuaddr;
+	mem_hdr->gpuaddr = 0;
+
+	/* The hw fence queues are mapped as iomem in the kernel */
+	if (desc->type == SNAPSHOT_GMU_MEM_HW_FENCE)
+		memcpy_fromio(data, desc->memdesc->hostptr, desc->memdesc->size);
+	else
+		memcpy(data, desc->memdesc->hostptr, desc->memdesc->size);
+
+	return desc->memdesc->size + sizeof(*mem_hdr);
+}
+
+static size_t gen8_gmu_snapshot_dtcm(struct kgsl_device *device,
+		u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_gmu_mem *mem_hdr =
+		(struct kgsl_snapshot_gmu_mem *)buf;
+	struct gen8_gmu_device *gmu = (struct gen8_gmu_device *)priv;
+	u32 *data = (u32 *)(buf + sizeof(*mem_hdr));
+	u32 i;
+
+	if (remain < gmu->vma[GMU_DTCM].size + sizeof(*mem_hdr)) {
+		SNAPSHOT_ERR_NOMEM(device, "GMU DTCM Memory");
+		return 0;
+	}
+
+	mem_hdr->type = SNAPSHOT_GMU_MEM_BIN_BLOCK;
+	mem_hdr->hostaddr = 0;
+	mem_hdr->gmuaddr = gmu->vma[GMU_DTCM].start;
+	mem_hdr->gpuaddr = 0;
+
+	/*
+	 * Read of GMU TCMs over side-band debug controller interface is
+	 * supported on gen8 family
+	 * region [20]: Dump ITCM/DTCM. Select 1 for DTCM.
+	 * autoInc [31]: Autoincrement the address field after each
+	 * access to TCM_DBG_DATA
+	 */
+	kgsl_regwrite(device, GEN8_CX_DBGC_TCM_DBG_ADDR, BIT(20) | BIT(31));
+
+	for (i = 0; i < (gmu->vma[GMU_DTCM].size >> 2); i++)
+		kgsl_regread(device, GEN8_CX_DBGC_TCM_DBG_DATA, data++);
+
+	return gmu->vma[GMU_DTCM].size + sizeof(*mem_hdr);
+}
+
+static size_t gen8_gmu_snapshot_itcm(struct kgsl_device *device,
+	u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_gmu_mem *mem_hdr =
+			(struct kgsl_snapshot_gmu_mem *)buf;
+	void *dest = buf + sizeof(*mem_hdr);
+	struct gen8_gmu_device *gmu = (struct gen8_gmu_device *)priv;
+
+	if (!gmu->itcm_shadow) {
+		dev_err(&gmu->pdev->dev, "No memory allocated for ITCM shadow capture\n");
+		return 0;
+	}
+
+	if (remain < gmu->vma[GMU_ITCM].size + sizeof(*mem_hdr)) {
+		SNAPSHOT_ERR_NOMEM(device, "GMU ITCM Memory");
+		return 0;
+	}
+
+	mem_hdr->type = SNAPSHOT_GMU_MEM_BIN_BLOCK;
+	mem_hdr->hostaddr = 0;
+	mem_hdr->gmuaddr = gmu->vma[GMU_ITCM].start;
+	mem_hdr->gpuaddr = 0;
+
+	memcpy(dest, gmu->itcm_shadow, gmu->vma[GMU_ITCM].size);
+
+	return gmu->vma[GMU_ITCM].size + sizeof(*mem_hdr);
+}
+
+static void gen8_gmu_snapshot_memories(struct kgsl_device *device,
+	struct gen8_gmu_device *gmu, struct kgsl_snapshot *snapshot)
+{
+	struct gmu_mem_type_desc desc;
+	struct kgsl_memdesc *md;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(gmu->gmu_globals); i++) {
+
+		md = &gmu->gmu_globals[i];
+		if (!md->size)
+			continue;
+
+		desc.memdesc = md;
+		if (md == gmu->hfi.hfi_mem)
+			desc.type = SNAPSHOT_GMU_MEM_HFI;
+		else if (md == gmu->gmu_log)
+			desc.type = SNAPSHOT_GMU_MEM_LOG;
+		else if (md == gmu->dump_mem)
+			desc.type = SNAPSHOT_GMU_MEM_DEBUG;
+		else if ((md == gmu->gmu_init_scratch) || (md == gmu->gpu_boot_scratch))
+			desc.type = SNAPSHOT_GMU_MEM_WARMBOOT;
+		else if (md == gmu->vrb)
+			desc.type = SNAPSHOT_GMU_MEM_VRB;
+		else if (md == gmu->trace.md)
+			desc.type = SNAPSHOT_GMU_MEM_TRACE;
+		else
+			desc.type = SNAPSHOT_GMU_MEM_BIN_BLOCK;
+
+		kgsl_snapshot_add_section(device,
+			KGSL_SNAPSHOT_SECTION_GMU_MEMORY,
+			snapshot, gen8_snapshot_gmu_mem, &desc);
+	}
+}
+
+struct kgsl_snapshot_gmu_version {
+	u32 type;
+	u32 value;
+};
+
+static size_t gen8_snapshot_gmu_version(struct kgsl_device *device,
+		u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf;
+	u32 *data = (u32 *) (buf + sizeof(*header));
+	struct kgsl_snapshot_gmu_version *ver = priv;
+
+	if (remain < DEBUG_SECTION_SZ(1)) {
+		SNAPSHOT_ERR_NOMEM(device, "GMU Version");
+		return 0;
+	}
+
+	header->type = ver->type;
+	header->size = 1;
+
+	*data = ver->value;
+
+	return DEBUG_SECTION_SZ(1);
+}
+
+static void gen8_gmu_snapshot_versions(struct kgsl_device *device,
+		struct gen8_gmu_device *gmu,
+		struct kgsl_snapshot *snapshot)
+{
+	int i;
+
+	struct kgsl_snapshot_gmu_version gmu_vers[] = {
+		{ .type = SNAPSHOT_DEBUG_GMU_CORE_VERSION,
+			.value = gmu->ver.core, },
+		{ .type = SNAPSHOT_DEBUG_GMU_CORE_DEV_VERSION,
+			.value = gmu->ver.core_dev, },
+		{ .type = SNAPSHOT_DEBUG_GMU_PWR_VERSION,
+			.value = gmu->ver.pwr, },
+		{ .type = SNAPSHOT_DEBUG_GMU_PWR_DEV_VERSION,
+			.value = gmu->ver.pwr_dev, },
+		{ .type = SNAPSHOT_DEBUG_GMU_HFI_VERSION,
+			.value = gmu->ver.hfi, },
+	};
+
+	for (i = 0; i < ARRAY_SIZE(gmu_vers); i++)
+		kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+				snapshot, gen8_snapshot_gmu_version,
+				&gmu_vers[i]);
+}
+
+#define RSCC_OFFSET_DWORDS 0x14000
+
+static size_t gen8_snapshot_rscc_registers(struct kgsl_device *device, u8 *buf,
+	size_t remain, void *priv)
+{
+	const u32 *regs = priv;
+	u32 *data = (u32 *)buf;
+	int count = 0, k;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+
+	/* Figure out how many registers we are going to dump */
+	count = adreno_snapshot_regs_count(regs);
+
+	if (remain < (count * 4)) {
+		SNAPSHOT_ERR_NOMEM(device, "RSCC REGISTERS");
+		return 0;
+	}
+
+	for (regs = priv; regs[0] != UINT_MAX; regs += 2) {
+		u32 cnt = REG_COUNT(regs);
+
+		if (cnt == 1) {
+			*data++ = BIT(31) |  regs[0];
+			*data++ =  __raw_readl(gmu->rscc_virt +
+				((regs[0] - RSCC_OFFSET_DWORDS) << 2));
+			continue;
+		}
+		*data++ = regs[0];
+		*data++ = cnt;
+		for (k = regs[0]; k <= regs[1]; k++)
+			*data++ = __raw_readl(gmu->rscc_virt +
+				((k - RSCC_OFFSET_DWORDS) << 2));
+	}
+
+	/* Return the size of the section */
+	return (count * 4);
+}
+
+/*
+ * gen8_gmu_device_snapshot() - GEN8 GMU snapshot function
+ * @device: Device being snapshotted
+ * @snapshot: Pointer to the snapshot instance
+ *
+ * This is where all of the GEN8 GMU specific bits and pieces are grabbed
+ * into the snapshot memory
+ */
+static void gen8_gmu_device_snapshot(struct kgsl_device *device,
+	struct kgsl_snapshot *snapshot)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	const struct adreno_gen8_core *gpucore = to_gen8_core(ADRENO_DEVICE(device));
+	const struct gen8_snapshot_block_list *gen8_snapshot_block_list =
+						gpucore->gen8_snapshot_block_list;
+	u32 i, slice, j;
+	struct gen8_reg_list_info info = {0};
+
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GMU_MEMORY,
+		snapshot, gen8_gmu_snapshot_itcm, gmu);
+
+	gen8_gmu_snapshot_versions(device, gmu, snapshot);
+
+	gen8_gmu_snapshot_memories(device, gmu, snapshot);
+
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2, snapshot,
+		gen8_snapshot_rscc_registers, (void *) gen8_snapshot_block_list->rscc_regs);
+
+	/* Capture GMU registers which are on CX domain and unsliced */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS_V2, snapshot,
+		adreno_snapshot_registers_v2,
+		(void *) gen8_snapshot_block_list->gmu_cx_unsliced_regs);
+
+	if (!gen8_gmu_rpmh_pwr_state_is_active(device) ||
+		!gen8_gmu_gx_is_on(adreno_dev))
+		goto dtcm;
+
+	/* Set fence to ALLOW mode so registers can be read */
+	kgsl_regwrite(device, GEN8_GMUAO_AHB_FENCE_CTRL, 0);
+
+	/* Capture GMU registers which are on GX domain */
+	for (i = 0 ; i < gen8_snapshot_block_list->num_gmu_gx_regs; i++) {
+		struct gen8_reg_list *regs = &gen8_snapshot_block_list->gmu_gx_regs[i];
+
+		slice = regs->slice_region ? MAX_PHYSICAL_SLICES : 1;
+		for (j = 0 ; j < slice; j++) {
+			info.regs = regs;
+			info.slice_id = SLICE_ID(regs->slice_region, j);
+			kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_MVC_V3, snapshot,
+				gen8_legacy_snapshot_registers, &info);
+		}
+	}
+
+dtcm:
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GMU_MEMORY,
+		snapshot, gen8_gmu_snapshot_dtcm, gmu);
+}
+
+void gen8_gmu_snapshot(struct adreno_device *adreno_dev,
+	struct kgsl_snapshot *snapshot)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	/*
+	 * Dump external register first to have GPUCC and other external
+	 * register in snapshot to analyze the system state even in partial
+	 * snapshot dump
+	 */
+	gen8_snapshot_external_core_regs(device, snapshot);
+
+	gen8_gmu_device_snapshot(device, snapshot);
+
+	gen8_snapshot(adreno_dev, snapshot);
+
+	gmu_core_regwrite(device, GEN8_GMUCX_GMU2HOST_INTR_CLR, UINT_MAX);
+	gmu_core_regwrite(device, GEN8_GMUCX_GMU2HOST_INTR_MASK, HFI_IRQ_MASK);
+}
--- a/qcom/opensource/graphics-kernel/adreno_gen8_hfi.c
+++ b/qcom/opensource/graphics-kernel/adreno_gen8_hfi.c
@ -0,0 +1,831 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/delay.h>
+#include <linux/nvmem-consumer.h>
+
+#include "adreno.h"
+#include "adreno_gen8.h"
+#include "adreno_gen8_gmu.h"
+#include "adreno_gen8_hfi.h"
+#include "kgsl_device.h"
+#include "kgsl_trace.h"
+
+/* Below section is for all structures related to HFI queues */
+#define HFI_QUEUE_MAX HFI_QUEUE_DEFAULT_CNT
+
+/* Total header sizes + queue sizes + 16 for alignment */
+#define HFIMEM_SIZE (sizeof(struct hfi_queue_table) + 16 + \
+		(HFI_QUEUE_SIZE * HFI_QUEUE_MAX))
+
+#define HOST_QUEUE_START_ADDR(hfi_mem, i) \
+	((hfi_mem)->hostptr + HFI_QUEUE_OFFSET(i))
+
+struct gen8_hfi *to_gen8_hfi(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+
+	return &gmu->hfi;
+}
+
+/* Size in below functions are in unit of dwords */
+int gen8_hfi_queue_read(struct gen8_gmu_device *gmu, u32 queue_idx,
+		u32 *output, u32 max_size)
+{
+	struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem;
+	struct hfi_queue_table *tbl = mem_addr->hostptr;
+	struct hfi_queue_header *hdr = &tbl->qhdr[queue_idx];
+	u32 *queue;
+	u32 msg_hdr;
+	u32 i, read;
+	u32 size;
+	int result = 0;
+
+	if (hdr->status == HFI_QUEUE_STATUS_DISABLED)
+		return -EINVAL;
+
+	if (hdr->read_index == hdr->write_index)
+		return -ENODATA;
+
+	/* Clear the output data before populating */
+	memset(output, 0, max_size);
+
+	queue = HOST_QUEUE_START_ADDR(mem_addr, queue_idx);
+	msg_hdr = queue[hdr->read_index];
+	size = MSG_HDR_GET_SIZE(msg_hdr);
+
+	if (size > (max_size >> 2)) {
+		dev_err(&gmu->pdev->dev,
+		"HFI message too big: hdr:0x%x rd idx=%d\n",
+			msg_hdr, hdr->read_index);
+		result = -EMSGSIZE;
+		goto done;
+	}
+
+	read = hdr->read_index;
+
+	if (read < hdr->queue_size) {
+		for (i = 0; i < size && i < (max_size >> 2); i++) {
+			output[i] = queue[read];
+			read = (read + 1)%hdr->queue_size;
+		}
+		result = size;
+	} else {
+		/* In case FW messed up */
+		dev_err(&gmu->pdev->dev,
+			"Read index %d greater than queue size %d\n",
+			hdr->read_index, hdr->queue_size);
+		result = -ENODATA;
+	}
+
+	read = ALIGN(read, SZ_4) % hdr->queue_size;
+
+	hfi_update_read_idx(hdr, read);
+
+	/* For acks, trace the packet for which this ack was sent */
+	if (MSG_HDR_GET_TYPE(msg_hdr) == HFI_MSG_ACK)
+		trace_kgsl_hfi_receive(MSG_HDR_GET_ID(output[1]),
+			MSG_HDR_GET_SIZE(output[1]),
+			MSG_HDR_GET_SEQNUM(output[1]));
+	else
+		trace_kgsl_hfi_receive(MSG_HDR_GET_ID(msg_hdr),
+			MSG_HDR_GET_SIZE(msg_hdr), MSG_HDR_GET_SEQNUM(msg_hdr));
+
+done:
+	return result;
+}
+
+int gen8_hfi_queue_write(struct adreno_device *adreno_dev, u32 queue_idx,
+		u32 *msg, u32 size_bytes)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct hfi_queue_table *tbl = gmu->hfi.hfi_mem->hostptr;
+	struct hfi_queue_header *hdr = &tbl->qhdr[queue_idx];
+	u32 *queue;
+	u32 i, write_idx, read_idx, empty_space;
+	u32 size_dwords = size_bytes >> 2;
+	u32 align_size = ALIGN(size_dwords, SZ_4);
+	u32 id = MSG_HDR_GET_ID(*msg);
+
+	if (hdr->status == HFI_QUEUE_STATUS_DISABLED || !IS_ALIGNED(size_bytes, sizeof(u32)))
+		return -EINVAL;
+
+	queue = HOST_QUEUE_START_ADDR(gmu->hfi.hfi_mem, queue_idx);
+
+	write_idx = hdr->write_index;
+	read_idx = hdr->read_index;
+
+	empty_space = (write_idx >= read_idx) ?
+			(hdr->queue_size - (write_idx - read_idx))
+			: (read_idx - write_idx);
+
+	if (empty_space <= align_size)
+		return -ENOSPC;
+
+	for (i = 0; i < size_dwords; i++) {
+		queue[write_idx] = msg[i];
+		write_idx = (write_idx + 1) % hdr->queue_size;
+	}
+
+	/* Cookify any non used data at the end of the write buffer */
+	for (; i < align_size; i++) {
+		queue[write_idx] = 0xfafafafa;
+		write_idx = (write_idx + 1) % hdr->queue_size;
+	}
+
+	trace_kgsl_hfi_send(id, size_dwords, MSG_HDR_GET_SEQNUM(*msg));
+
+	hfi_update_write_idx(&hdr->write_index, write_idx);
+
+	return 0;
+}
+
+int gen8_hfi_cmdq_write(struct adreno_device *adreno_dev, u32 *msg, u32 size_bytes)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct gen8_hfi *hfi = &gmu->hfi;
+	int ret;
+
+	spin_lock(&hfi->cmdq_lock);
+
+	if (test_bit(MSG_HDR_GET_ID(msg[0]), hfi->wb_set_record_bitmask))
+		*msg = RECORD_MSG_HDR(*msg);
+
+	ret = gen8_hfi_queue_write(adreno_dev, HFI_CMD_ID, msg, size_bytes);
+
+	/*
+	 * Some messages like ACD table and perf table are saved in memory, so we need
+	 * to reset the header to make sure we do not send a record enabled bit incase
+	 * we change the warmboot setting from debugfs
+	 */
+	*msg = CLEAR_RECORD_MSG_HDR(*msg);
+	/*
+	 * Memory barrier to make sure packet and write index are written before
+	 * an interrupt is raised
+	 */
+	wmb();
+
+	/* Send interrupt to GMU to receive the message */
+	if (!ret)
+		gmu_core_regwrite(KGSL_DEVICE(adreno_dev),
+			GEN8_GMUCX_HOST2GMU_INTR_SET, 0x1);
+
+	spin_unlock(&hfi->cmdq_lock);
+
+	return ret;
+}
+
+/* Sizes of the queue and message are in unit of dwords */
+static void init_queues(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem;
+	int i;
+	struct hfi_queue_table *tbl;
+	struct hfi_queue_header *hdr;
+	struct {
+		u32 idx;
+		u32 pri;
+		u32 status;
+	} queue[HFI_QUEUE_MAX] = {
+		{ HFI_CMD_ID, HFI_CMD_PRI, HFI_QUEUE_STATUS_ENABLED },
+		{ HFI_MSG_ID, HFI_MSG_PRI, HFI_QUEUE_STATUS_ENABLED },
+		{ HFI_DBG_ID, HFI_DBG_PRI, HFI_QUEUE_STATUS_ENABLED },
+	};
+
+	/* Fill Table Header */
+	tbl = mem_addr->hostptr;
+	tbl->qtbl_hdr.version = 0;
+	tbl->qtbl_hdr.size = sizeof(struct hfi_queue_table) >> 2;
+	tbl->qtbl_hdr.qhdr0_offset = sizeof(struct hfi_queue_table_header) >> 2;
+	tbl->qtbl_hdr.qhdr_size = sizeof(struct hfi_queue_header) >> 2;
+	tbl->qtbl_hdr.num_q = HFI_QUEUE_MAX;
+	tbl->qtbl_hdr.num_active_q = HFI_QUEUE_MAX;
+
+	memset(&tbl->qhdr[0], 0, sizeof(tbl->qhdr));
+
+	/* Fill Individual Queue Headers */
+	for (i = 0; i < HFI_QUEUE_MAX; i++) {
+		hdr = &tbl->qhdr[i];
+		hdr->start_addr = GMU_QUEUE_START_ADDR(mem_addr->gmuaddr, i);
+		hdr->type = QUEUE_HDR_TYPE(queue[i].idx, queue[i].pri, 0, 0);
+		hdr->status = queue[i].status;
+		hdr->queue_size = HFI_QUEUE_SIZE >> 2; /* convert to dwords */
+	}
+}
+
+int gen8_hfi_init(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct gen8_hfi *hfi = &gmu->hfi;
+
+	/* Allocates & maps memory for HFI */
+	if (IS_ERR_OR_NULL(hfi->hfi_mem)) {
+		hfi->hfi_mem = gen8_reserve_gmu_kernel_block(gmu, 0,
+				HFIMEM_SIZE, GMU_NONCACHED_KERNEL, 0);
+		if (!IS_ERR(hfi->hfi_mem))
+			init_queues(adreno_dev);
+	}
+
+	return PTR_ERR_OR_ZERO(hfi->hfi_mem);
+}
+
+int gen8_receive_ack_cmd(struct gen8_gmu_device *gmu, void *rcvd,
+	struct pending_cmd *ret_cmd)
+{
+	struct adreno_device *adreno_dev = gen8_gmu_to_adreno(gmu);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 *ack = rcvd;
+	u32 hdr = ack[0];
+	u32 req_hdr = ack[1];
+
+	if (ret_cmd == NULL)
+		return -EINVAL;
+
+	if (CMP_HFI_ACK_HDR(ret_cmd->sent_hdr, req_hdr)) {
+		memcpy(&ret_cmd->results, ack, MSG_HDR_GET_SIZE(hdr) << 2);
+		return 0;
+	}
+
+	/* Didn't find the sender, list the waiter */
+	dev_err_ratelimited(&gmu->pdev->dev,
+		"HFI ACK: Cannot find sender for 0x%8.8x Waiter: 0x%8.8x\n",
+		req_hdr, ret_cmd->sent_hdr);
+
+	gmu_core_fault_snapshot(device);
+
+	return -ENODEV;
+}
+
+static int poll_gmu_reg(struct adreno_device *adreno_dev,
+	u32 offsetdwords, u32 expected_val,
+	u32 mask, u32 timeout_ms)
+{
+	u32 val;
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned long timeout = jiffies + msecs_to_jiffies(timeout_ms);
+	bool nmi = false;
+
+	while (time_is_after_jiffies(timeout)) {
+		gmu_core_regread(device, offsetdwords, &val);
+		if ((val & mask) == expected_val)
+			return 0;
+
+		/*
+		 * If GMU firmware fails any assertion, error message is sent
+		 * to KMD and NMI is triggered. So check if GMU is in NMI and
+		 * timeout early. Bits [11:9] of A6XX_GMU_CM3_FW_INIT_RESULT
+		 * contain GMU reset status. Non zero value here indicates that
+		 * GMU reset is active, NMI handler would eventually complete
+		 * and GMU would wait for recovery.
+		 */
+		gmu_core_regread(device, GEN8_GMUCX_CM3_FW_INIT_RESULT, &val);
+		if (val & 0xE00) {
+			nmi = true;
+			break;
+		}
+
+		usleep_range(10, 100);
+	}
+
+	/* Check one last time */
+	gmu_core_regread(device, offsetdwords, &val);
+	if ((val & mask) == expected_val)
+		return 0;
+
+	dev_err(&gmu->pdev->dev,
+		"Reg poll %s: offset 0x%x, want 0x%x, got 0x%x\n",
+		nmi ? "abort" : "timeout", offsetdwords, expected_val,
+		val & mask);
+
+	return -ETIMEDOUT;
+}
+
+static int gen8_hfi_send_cmd_wait_inline(struct adreno_device *adreno_dev,
+	void *data, u32 size_bytes, struct pending_cmd *ret_cmd)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int rc;
+	u32 *cmd = data;
+	struct gen8_hfi *hfi = &gmu->hfi;
+	u32 seqnum = atomic_inc_return(&hfi->seqnum);
+
+	*cmd = MSG_HDR_SET_SEQNUM_SIZE(*cmd, seqnum, size_bytes >> 2);
+	if (ret_cmd == NULL)
+		return gen8_hfi_cmdq_write(adreno_dev, cmd, size_bytes);
+
+	ret_cmd->sent_hdr = cmd[0];
+
+	rc = gen8_hfi_cmdq_write(adreno_dev, cmd, size_bytes);
+	if (rc)
+		return rc;
+
+	rc = poll_gmu_reg(adreno_dev, GEN8_GMUCX_GMU2HOST_INTR_INFO,
+		HFI_IRQ_MSGQ_MASK, HFI_IRQ_MSGQ_MASK, HFI_RSP_TIMEOUT);
+
+	if (rc) {
+		gmu_core_fault_snapshot(device);
+		dev_err(&gmu->pdev->dev,
+		"Timed out waiting on ack for 0x%8.8x (id %d, sequence %d)\n",
+		cmd[0], MSG_HDR_GET_ID(*cmd), MSG_HDR_GET_SEQNUM(*cmd));
+		return rc;
+	}
+
+	/* Clear the interrupt */
+	gmu_core_regwrite(device, GEN8_GMUCX_GMU2HOST_INTR_CLR,
+		HFI_IRQ_MSGQ_MASK);
+
+	rc = gen8_hfi_process_queue(gmu, HFI_MSG_ID, ret_cmd);
+
+	return rc;
+}
+
+int gen8_hfi_send_generic_req(struct adreno_device *adreno_dev, void *cmd, u32 size_bytes)
+{
+	struct pending_cmd ret_cmd;
+	int rc;
+
+	memset(&ret_cmd, 0, sizeof(ret_cmd));
+
+	rc = gen8_hfi_send_cmd_wait_inline(adreno_dev, cmd, size_bytes, &ret_cmd);
+	if (rc)
+		return rc;
+
+	if (ret_cmd.results[2]) {
+		struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+		struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+		gmu_core_fault_snapshot(device);
+		dev_err(&gmu->pdev->dev,
+				"HFI ACK failure: Req=0x%8.8X, Result=0x%8.8X\n",
+				ret_cmd.results[1],
+				ret_cmd.results[2]);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int gen8_hfi_send_core_fw_start(struct adreno_device *adreno_dev)
+{
+	struct hfi_core_fw_start_cmd cmd = {
+		.handle = 0x0,
+	};
+	int ret;
+
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_CORE_FW_START);
+	if (ret)
+		return ret;
+
+	return gen8_hfi_send_generic_req(adreno_dev, &cmd, sizeof(cmd));
+}
+
+static const char *feature_to_string(u32 feature)
+{
+	if (feature == HFI_FEATURE_ACD)
+		return "ACD";
+
+	return "unknown";
+}
+
+/* For sending hfi message inline to handle GMU return type error */
+int gen8_hfi_send_generic_req_v5(struct adreno_device *adreno_dev, void *cmd,
+		struct pending_cmd *ret_cmd, u32 size_bytes)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	int rc;
+
+	if (GMU_VER_MINOR(gmu->ver.hfi) <= 4)
+		return gen8_hfi_send_generic_req(adreno_dev, cmd, size_bytes);
+
+	rc = gen8_hfi_send_cmd_wait_inline(adreno_dev, cmd, size_bytes, ret_cmd);
+	if (rc)
+		return rc;
+
+	switch (ret_cmd->results[3]) {
+	case GMU_SUCCESS:
+		rc = ret_cmd->results[2];
+		break;
+	case GMU_ERROR_NO_ENTRY:
+		/* Unique error to handle undefined HFI msgs by caller */
+		rc = -ENOENT;
+		break;
+	case GMU_ERROR_TIMEOUT:
+		rc = -EINVAL;
+		break;
+	default:
+		gmu_core_fault_snapshot(KGSL_DEVICE(adreno_dev));
+		dev_err(&gmu->pdev->dev,
+			"HFI ACK: Req=0x%8.8X, Result=0x%8.8X Error:0x%8.8X\n",
+			ret_cmd->results[1], ret_cmd->results[2], ret_cmd->results[3]);
+		rc = -EINVAL;
+		break;
+	}
+
+	return rc;
+}
+
+int gen8_hfi_send_feature_ctrl(struct adreno_device *adreno_dev,
+	u32 feature, u32 enable, u32 data)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct pending_cmd ret_cmd = {0};
+	struct hfi_feature_ctrl_cmd cmd = {
+		.feature = feature,
+		.enable = enable,
+		.data = data,
+	};
+	int ret;
+
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_FEATURE_CTRL);
+	if (ret)
+		return ret;
+
+	ret = gen8_hfi_send_generic_req_v5(adreno_dev, &cmd, &ret_cmd, sizeof(cmd));
+	if (ret < 0)
+		dev_err(&gmu->pdev->dev,
+				"Unable to %s feature %s (%d)\n",
+				enable ? "enable" : "disable",
+				feature_to_string(feature),
+				feature);
+	return ret;
+}
+
+int gen8_hfi_send_get_value(struct adreno_device *adreno_dev, u32 type, u32 subtype)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct pending_cmd ret_cmd = {0};
+	struct hfi_get_value_cmd cmd = {
+		.type = type,
+		.subtype = subtype,
+	};
+	int ret;
+
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_GET_VALUE);
+	if (ret)
+		return ret;
+
+	ret = gen8_hfi_send_generic_req_v5(adreno_dev, &cmd, &ret_cmd, sizeof(cmd));
+	if (ret < 0)
+		dev_err(&gmu->pdev->dev,
+			"Unable to get HFI Value type: %d, subtype: %d, error = %d\n",
+			type, subtype, ret);
+
+	return ret;
+}
+
+int gen8_hfi_send_set_value(struct adreno_device *adreno_dev,
+		u32 type, u32 subtype, u32 data)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct pending_cmd ret_cmd = {0};
+	struct hfi_set_value_cmd cmd = {
+		.type = type,
+		.subtype = subtype,
+		.data = data,
+	};
+	int ret;
+
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_SET_VALUE);
+	if (ret)
+		return ret;
+
+	ret = gen8_hfi_send_generic_req_v5(adreno_dev, &cmd, &ret_cmd, sizeof(cmd));
+	if (ret < 0)
+		dev_err(&gmu->pdev->dev,
+			"Unable to set HFI Value %d, %d to %d, error = %d\n",
+			type, subtype, data, ret);
+	return ret;
+}
+
+void adreno_gen8_receive_err_req(struct gen8_gmu_device *gmu, void *rcvd)
+{
+	struct hfi_err_cmd *cmd = rcvd;
+
+	dev_err(&gmu->pdev->dev, "HFI Error Received: %d %d %.16s\n",
+			((cmd->error_code >> 16) & 0xffff),
+			(cmd->error_code & 0xffff),
+			(char *) cmd->data);
+}
+
+void adreno_gen8_receive_debug_req(struct gen8_gmu_device *gmu, void *rcvd)
+{
+	struct hfi_debug_cmd *cmd = rcvd;
+
+	dev_dbg(&gmu->pdev->dev, "HFI Debug Received: %d %d %d\n",
+			cmd->type, cmd->timestamp, cmd->data);
+}
+
+int gen8_hfi_process_queue(struct gen8_gmu_device *gmu,
+		u32 queue_idx, struct pending_cmd *ret_cmd)
+{
+	u32 rcvd[MAX_RCVD_SIZE];
+
+	while (gen8_hfi_queue_read(gmu, queue_idx, rcvd, sizeof(rcvd)) > 0) {
+		/* ACK Handler */
+		if (MSG_HDR_GET_TYPE(rcvd[0]) == HFI_MSG_ACK) {
+			int ret = gen8_receive_ack_cmd(gmu, rcvd, ret_cmd);
+
+			if (ret)
+				return ret;
+			continue;
+		}
+
+		/* Request Handler */
+		switch (MSG_HDR_GET_ID(rcvd[0])) {
+		case F2H_MSG_ERR: /* No Reply */
+			adreno_gen8_receive_err_req(gmu, rcvd);
+			break;
+		case F2H_MSG_DEBUG: /* No Reply */
+			adreno_gen8_receive_debug_req(gmu, rcvd);
+			break;
+		default: /* No Reply */
+			dev_err(&gmu->pdev->dev,
+				"HFI request %d not supported\n",
+				MSG_HDR_GET_ID(rcvd[0]));
+			break;
+		}
+	}
+
+	return 0;
+}
+
+int gen8_hfi_send_bcl_feature_ctrl(struct adreno_device *adreno_dev)
+{
+	if (!adreno_dev->bcl_enabled)
+		return 0;
+
+	/*
+	 * BCL data is expected by gmu in below format
+	 * BIT[0] - response type
+	 * BIT[1:7] - Throttle level 1 (optional)
+	 * BIT[8:14] - Throttle level 2 (optional)
+	 * BIT[15:21] - Throttle level 3 (optional)
+	 */
+	return gen8_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_BCL, 1, adreno_dev->bcl_data);
+}
+
+int gen8_hfi_send_clx_feature_ctrl(struct adreno_device *adreno_dev)
+{
+	int ret = 0;
+	struct hfi_clx_table_v2_cmd cmd = {0};
+
+	if (!adreno_dev->clx_enabled)
+		return 0;
+
+	/* Make sure the table is valid before enabling feature */
+	ret = CMD_MSG_HDR(cmd, H2F_MSG_CLX_TBL);
+	if (ret)
+		return ret;
+
+	ret = gen8_hfi_send_feature_ctrl(adreno_dev, HFI_FEATURE_CLX, 1, 0);
+	if (ret)
+		return ret;
+
+	cmd.version = FIELD_PREP(GENMASK(31, 16), 0x2) | FIELD_PREP(GENMASK(15, 0), 0x1);
+	/* cmd.domain[0] is never used but needed per hfi spec */
+	cmd.domain[1].data0 = FIELD_PREP(GENMASK(31, 29), 1) |
+				FIELD_PREP(GENMASK(28, 28), 1) |
+				FIELD_PREP(GENMASK(27, 22), 1) |
+				FIELD_PREP(GENMASK(21, 16), 40) |
+				FIELD_PREP(GENMASK(15, 0), 0);
+	cmd.domain[1].clxt = 0;
+	cmd.domain[1].clxh = 0;
+	cmd.domain[1].urgmode = 1;
+	cmd.domain[1].lkgen = 0;
+	cmd.domain[1].currbudget = 50;
+
+	return gen8_hfi_send_generic_req(adreno_dev, &cmd, sizeof(cmd));
+}
+
+#define EVENT_PWR_ACD_THROTTLE_PROF 44
+
+int gen8_hfi_send_acd_feature_ctrl(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	int ret = 0;
+
+	if (adreno_dev->acd_enabled) {
+		ret = gen8_hfi_send_feature_ctrl(adreno_dev,
+			HFI_FEATURE_ACD, 1, 0);
+		if (ret)
+			return ret;
+
+		ret = gen8_hfi_send_generic_req(adreno_dev,
+				&gmu->hfi.acd_table, sizeof(gmu->hfi.acd_table));
+		if (ret)
+			return ret;
+
+		gen8_hfi_send_set_value(adreno_dev, HFI_VALUE_LOG_EVENT_ON,
+				EVENT_PWR_ACD_THROTTLE_PROF, 0);
+	}
+
+	return 0;
+}
+
+int gen8_hfi_send_ifpc_feature_ctrl(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+
+	if (gmu->idle_level == GPU_HW_IFPC)
+		return gen8_hfi_send_feature_ctrl(adreno_dev,
+				HFI_FEATURE_IFPC, 1, adreno_dev->ifpc_hyst);
+	return 0;
+}
+
+static void reset_hfi_queues(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct kgsl_memdesc *mem_addr = gmu->hfi.hfi_mem;
+	struct hfi_queue_table *tbl = mem_addr->hostptr;
+	struct hfi_queue_header *hdr;
+	u32 i;
+
+	/* Flush HFI queues */
+	for (i = 0; i < HFI_QUEUE_MAX; i++) {
+		hdr = &tbl->qhdr[i];
+		if (hdr->status == HFI_QUEUE_STATUS_DISABLED)
+			continue;
+
+		hdr->read_index = hdr->write_index;
+	}
+}
+
+/* Fill the entry and return the dword count written */
+static u32 _fill_table_entry(struct hfi_table_entry *entry, u32 count,
+		u32 stride_bytes, u32 *data)
+{
+	entry->count = count;
+	entry->stride = stride_bytes >> 2; /* entry->stride is in dwords */
+	memcpy(entry->data, data, stride_bytes * count);
+
+	/* Return total dword count of entry + data */
+	return (sizeof(*entry) >> 2) + (entry->count * entry->stride);
+}
+
+int gen8_hfi_send_gpu_perf_table(struct adreno_device *adreno_dev)
+{
+	/*
+	 * Buffer to store either hfi_table_cmd or hfi_dcvstable_cmd.
+	 * Current max size for either is 165 dwords.
+	 */
+	static u32 cmd_buf[200];
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct gen8_dcvs_table *tbl = &gmu->dcvs_table;
+	int ret = 0;
+
+	/* Starting with GMU HFI Version 2.6.1, use H2F_MSG_TABLE */
+	if (gmu->ver.hfi >= HFI_VERSION(2, 6, 1)) {
+		struct hfi_table_cmd *cmd = (struct hfi_table_cmd *)&cmd_buf[0];
+		u32 dword_off;
+
+		/* Already setup, so just send cmd */
+		if (cmd->hdr)
+			return gen8_hfi_send_generic_req(adreno_dev, cmd,
+					MSG_HDR_GET_SIZE(cmd->hdr) << 2);
+
+		if (tbl->gpu_level_num > MAX_GX_LEVELS || tbl->gmu_level_num > MAX_CX_LEVELS)
+			return -EINVAL;
+
+		/* CMD starts with struct hfi_table_cmd data */
+		cmd->type = HFI_TABLE_GPU_PERF;
+		dword_off = sizeof(*cmd) >> 2;
+
+		/* Fill in the table entry and data starting at dword_off */
+		dword_off += _fill_table_entry((struct hfi_table_entry *)&cmd_buf[dword_off],
+				tbl->gpu_level_num, sizeof(struct opp_gx_desc),
+				(u32 *)tbl->gx_votes);
+
+		/* Fill in the table entry and data starting at dword_off */
+		dword_off += _fill_table_entry((struct hfi_table_entry *)&cmd_buf[dword_off],
+				tbl->gmu_level_num, sizeof(struct opp_desc),
+				(u32 *)tbl->cx_votes);
+
+		cmd->hdr = CREATE_MSG_HDR(H2F_MSG_TABLE, HFI_MSG_CMD);
+		cmd->hdr = MSG_HDR_SET_SIZE(cmd->hdr, dword_off);
+
+		ret = gen8_hfi_send_generic_req(adreno_dev, cmd, dword_off << 2);
+	} else {
+		struct hfi_dcvstable_cmd *cmd = (struct hfi_dcvstable_cmd *)&cmd_buf[0];
+
+		/* Already setup, so just send cmd */
+		if (cmd->hdr)
+			return gen8_hfi_send_generic_req(adreno_dev, cmd, sizeof(*cmd));
+
+		if (tbl->gpu_level_num > MAX_GX_LEVELS_LEGACY || tbl->gmu_level_num > MAX_CX_LEVELS)
+			return -EINVAL;
+
+		ret = CMD_MSG_HDR(*cmd, H2F_MSG_PERF_TBL);
+		if (ret)
+			return ret;
+
+		cmd->gpu_level_num = tbl->gpu_level_num;
+		cmd->gmu_level_num = tbl->gmu_level_num;
+		memcpy(&cmd->gx_votes, tbl->gx_votes,
+				sizeof(struct opp_gx_desc) * cmd->gpu_level_num);
+		memcpy(&cmd->cx_votes, tbl->cx_votes,
+				sizeof(struct opp_desc) * cmd->gmu_level_num);
+
+		ret = gen8_hfi_send_generic_req(adreno_dev, cmd, sizeof(*cmd));
+	}
+
+	return ret;
+}
+
+int gen8_hfi_start(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int result;
+
+	reset_hfi_queues(adreno_dev);
+
+	result = gen8_hfi_send_gpu_perf_table(adreno_dev);
+	if (result)
+		goto err;
+
+	result = gen8_hfi_send_generic_req(adreno_dev, &gmu->hfi.bw_table,
+			sizeof(gmu->hfi.bw_table));
+	if (result)
+		goto err;
+
+	result = gen8_hfi_send_acd_feature_ctrl(adreno_dev);
+	if (result)
+		goto err;
+
+	result = gen8_hfi_send_bcl_feature_ctrl(adreno_dev);
+	if (result)
+		goto err;
+
+	result = gen8_hfi_send_clx_feature_ctrl(adreno_dev);
+	if (result)
+		goto err;
+
+	result = gen8_hfi_send_ifpc_feature_ctrl(adreno_dev);
+	if (result)
+		goto err;
+
+	result = gen8_hfi_send_core_fw_start(adreno_dev);
+	if (result)
+		goto err;
+
+	set_bit(GMU_PRIV_HFI_STARTED, &gmu->flags);
+
+	/* Request default DCVS level */
+	result = kgsl_pwrctrl_set_default_gpu_pwrlevel(device);
+	if (result)
+		goto err;
+
+	/* Request default BW vote */
+	result = kgsl_pwrctrl_axi(device, true);
+
+err:
+	if (result)
+		gen8_hfi_stop(adreno_dev);
+
+	return result;
+
+}
+
+void gen8_hfi_stop(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	kgsl_pwrctrl_axi(device, false);
+
+	clear_bit(GMU_PRIV_HFI_STARTED, &gmu->flags);
+}
+
+/* HFI interrupt handler */
+irqreturn_t gen8_hfi_irq_handler(int irq, void *data)
+{
+	struct kgsl_device *device = data;
+	struct gen8_gmu_device *gmu = to_gen8_gmu(ADRENO_DEVICE(device));
+	u32 status = 0;
+
+	gmu_core_regread(device, GEN8_GMUCX_GMU2HOST_INTR_INFO, &status);
+	gmu_core_regwrite(device, GEN8_GMUCX_GMU2HOST_INTR_CLR, HFI_IRQ_MASK);
+
+	if (status & HFI_IRQ_DBGQ_MASK)
+		gen8_hfi_process_queue(gmu, HFI_DBG_ID, NULL);
+	if (status & HFI_IRQ_CM3_FAULT_MASK) {
+		dev_err_ratelimited(&gmu->pdev->dev,
+				"GMU CM3 fault interrupt received\n");
+		atomic_set(&gmu->cm3_fault, 1);
+
+		/* make sure other CPUs see the update */
+		smp_wmb();
+	}
+	if (status & ~HFI_IRQ_MASK)
+		dev_err_ratelimited(&gmu->pdev->dev,
+				"Unhandled HFI interrupts 0x%lx\n",
+				status & ~HFI_IRQ_MASK);
+
+	return IRQ_HANDLED;
+}
--- a/qcom/opensource/graphics-kernel/adreno_gen8_hfi.h
+++ b/qcom/opensource/graphics-kernel/adreno_gen8_hfi.h
@ -0,0 +1,235 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef __ADRENO_GEN8_HFI_H
+#define __ADRENO_GEN8_HFI_H
+
+#include "adreno_hfi.h"
+
+/**
+ * struct gen8_hfi - HFI control structure
+ */
+struct gen8_hfi {
+	/** @irq: HFI interrupt line */
+	int irq;
+	/** @seqnum: atomic counter that is incremented for each message sent.
+	 *   The value of the counter is used as sequence number for HFI message.
+	 */
+	atomic_t seqnum;
+	/** @hfi_mem: Memory descriptor for the hfi memory */
+	struct kgsl_memdesc *hfi_mem;
+	/** @bw_table: HFI BW table buffer */
+	struct hfi_bwtable_cmd bw_table;
+	/** @acd_table: HFI table for ACD data */
+	struct hfi_acd_table_cmd acd_table;
+	/** @cmdq_lock: Spinlock for accessing the cmdq */
+	spinlock_t cmdq_lock;
+	/**
+	 * @wb_set_record_bitmask: Bitmask to enable or disable the recording
+	 * of messages in the GMU scratch.
+	 */
+	unsigned long wb_set_record_bitmask[BITS_TO_LONGS(HFI_MAX_ID)];
+};
+
+struct gen8_gmu_device;
+
+/* gen8_hfi_irq_handler - IRQ handler for HFI interripts */
+irqreturn_t gen8_hfi_irq_handler(int irq, void *data);
+
+/**
+ * gen8_hfi_start - Send the various HFIs during device boot up
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_hfi_start(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hfi_start - Send the various HFIs during device boot up
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+void gen8_hfi_stop(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hfi_init - Initialize hfi resources
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * This function allocates and sets up hfi queues
+ * when a process creates the very first kgsl instance
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_hfi_init(struct adreno_device *adreno_dev);
+
+/* Helper function to get to gen8 hfi struct from adreno device */
+struct gen8_hfi *to_gen8_hfi(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hfi_queue_write - Write a command to hfi queue
+ * @adreno_dev: Pointer to the adreno device
+ * @queue_idx: destination queue id
+ * @msg: Data to be written to the queue
+ * @size_bytes: Size of the command in bytes
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_hfi_queue_write(struct adreno_device *adreno_dev, u32 queue_idx,
+		u32 *msg, u32 size_bytes);
+
+/**
+ * gen8_hfi_queue_read - Read data from hfi queue
+ * @gmu: Pointer to the gen8 gmu device
+ * @queue_idx: queue id to read from
+ * @output: Pointer to read the data into
+ * @max_size: Number of bytes to read from the queue
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_hfi_queue_read(struct gen8_gmu_device *gmu, u32 queue_idx,
+		u32 *output, u32 max_size);
+
+/**
+ * gen8_receive_ack_cmd - Process ack type packets
+ * @gmu: Pointer to the gen8 gmu device
+ * @rcvd: Pointer to the data read from hfi queue
+ * @ret_cmd: Container for the hfi packet for which this ack is received
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_receive_ack_cmd(struct gen8_gmu_device *gmu, void *rcvd,
+		struct pending_cmd *ret_cmd);
+
+/**
+ * gen8_hfi_send_feature_ctrl - Enable gmu feature via hfi
+ * @adreno_dev: Pointer to the adreno device
+ * @feature: feature to be enabled or disabled
+ * enable: Set 1 to enable or 0 to disable a feature
+ * @data: payload for the send feature hfi packet
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_hfi_send_feature_ctrl(struct adreno_device *adreno_dev,
+		u32 feature, u32 enable, u32 data);
+
+/**
+ * gen8_hfi_send_get_value - Send gmu get_values via hfi
+ * @adreno_dev: Pointer to the adreno device
+ * @type: GMU get_value type
+ * @subtype: GMU get_value subtype
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_hfi_send_get_value(struct adreno_device *adreno_dev, u32 type, u32 subtype);
+
+/**
+ * gen8_hfi_send_set_value - Send gmu set_values via hfi
+ * @adreno_dev: Pointer to the adreno device
+ * @type: GMU set_value type
+ * @subtype: GMU set_value subtype
+ * @data: Value to set
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_hfi_send_set_value(struct adreno_device *adreno_dev,
+		u32 type, u32 subtype, u32 data);
+
+/**
+ * gen8_hfi_send_core_fw_start - Send the core fw start hfi
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_hfi_send_core_fw_start(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hfi_send_acd_feature_ctrl - Send the acd table and acd feature
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_hfi_send_acd_feature_ctrl(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hfi_send_generic_req - Send a generic hfi packet
+ * @adreno_dev: Pointer to the adreno device
+ * @cmd: Pointer to the hfi packet header and data
+ * @size_bytes: Size of the packet in bytes
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_hfi_send_generic_req(struct adreno_device *adreno_dev, void *cmd, u32 size_bytes);
+
+/**
+ * gen8_hfi_send_generic_req_v5 - Send a generic hfi packet with additional error handling
+ * @adreno_dev: Pointer to the adreno device
+ * @cmd: Pointer to the hfi packet header and data
+ * @ret_cmd: Ack for the command we just sent
+ * @size_bytes: Size of the packet in bytes
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_hfi_send_generic_req_v5(struct adreno_device *adreno_dev, void *cmd,
+		struct pending_cmd *ret_cmd, u32 size_bytes);
+
+/**
+ * gen8_hfi_send_bcl_feature_ctrl - Send the bcl feature hfi packet
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_hfi_send_bcl_feature_ctrl(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hfi_send_clx_feature_ctrl - Send the clx feature hfi packet
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_hfi_send_clx_feature_ctrl(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hfi_send_ifpc_feature_ctrl - Send the ipfc feature hfi packet
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_hfi_send_ifpc_feature_ctrl(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hfi_send_gpu_perf_table - Send the gpu perf table hfi packet
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_hfi_send_gpu_perf_table(struct adreno_device *adreno_dev);
+
+/*
+ * gen8_hfi_process_queue - Check hfi queue for messages from gmu
+ * @gmu: Pointer to the gen8 gmu device
+ * @queue_idx: queue id to be processed
+ * @ret_cmd: Container for data needed for waiting for the ack
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_hfi_process_queue(struct gen8_gmu_device *gmu,
+		u32 queue_idx, struct pending_cmd *ret_cmd);
+
+/**
+ * gen8_hfi_cmdq_write - Write a command to command queue
+ * @adreno_dev: Pointer to the adreno device
+ * @msg: Data to be written to the queue
+ * @size_bytes: Size of the command in bytes
+ *
+ * This function takes the cmdq lock before writing data to the queue
+
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_hfi_cmdq_write(struct adreno_device *adreno_dev, u32 *msg, u32 size_bytes);
+void adreno_gen8_receive_err_req(struct gen8_gmu_device *gmu, void *rcvd);
+void adreno_gen8_receive_debug_req(struct gen8_gmu_device *gmu, void *rcvd);
+#endif
--- a/qcom/opensource/graphics-kernel/adreno_gen8_hwsched.c
+++ b/qcom/opensource/graphics-kernel/adreno_gen8_hwsched.c
--- a/qcom/opensource/graphics-kernel/adreno_gen8_hwsched.h
+++ b/qcom/opensource/graphics-kernel/adreno_gen8_hwsched.h
@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _ADRENO_GEN8_HWSCHED_H_
+#define _ADRENO_GEN8_HWSCHED_H_
+
+#include "adreno_gen8_hwsched_hfi.h"
+
+/**
+ * struct gen8_hwsched_device - Container for the gen8 hwscheduling device
+ */
+struct gen8_hwsched_device {
+	/** @gen8_dev: Container for the gen8 device */
+	struct gen8_device gen8_dev;
+	/** @hwsched_hfi: Container for hwscheduling specific hfi resources */
+	struct gen8_hwsched_hfi hwsched_hfi;
+};
+
+/**
+ * gen8_hwsched_probe - Target specific probe for hwsched
+ * @pdev: Pointer to the platform device
+ * @chipid: Chipid of the target
+ * @gpucore: Pointer to the gpucore
+ *
+ * The target specific probe function for hwsched enabled gmu targets.
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_hwsched_probe(struct platform_device *pdev,
+		u32 chipid, const struct adreno_gpu_core *gpucore);
+
+/**
+ * gen8_hwsched_reset_replay - Restart the gmu and gpu and replay inflight cmdbatches
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_hwsched_reset_replay(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hwsched_snapshot - take gen8 hwsched snapshot
+ * @adreno_dev: Pointer to the adreno device
+ * @snapshot: Pointer to the snapshot instance
+ *
+ * Snapshot the faulty ib and then snapshot rest of gen8 gmu things
+ */
+void gen8_hwsched_snapshot(struct adreno_device *adreno_dev,
+		struct kgsl_snapshot *snapshot);
+
+/**
+ * gen8_hwsched_handle_watchdog - Handle watchdog interrupt
+ * @adreno_dev: Pointer to the adreno device
+ */
+void gen8_hwsched_handle_watchdog(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hwsched_active_count_get - Increment the active count
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * This function increments the active count. If active count
+ * is 0, this function also powers up the device.
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_hwsched_active_count_get(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hwsched_active_count_put - Put back the active count
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * This function decrements the active count sets the idle
+ * timer if active count is zero.
+ */
+void gen8_hwsched_active_count_put(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hwsched_add_to_minidump - Register hwsched_device with va minidump
+ * @adreno_dev: Pointer to the adreno device
+ */
+int gen8_hwsched_add_to_minidump(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hwsched_send_recurring_cmdobj - Dispatch IBs to GMU
+ * @adreno_dev: Pointer to adreno device structure
+ * @cmdobj: The command object which needs to be submitted
+ *
+ * This function is used to register the context if needed and submit
+ * recurring IBs to the GMU. Upon receiving ipc interrupt GMU will submit
+ * recurring IBs to GPU.
+
+ * Return: 0 on success and negative error on failure
+ */
+int gen8_hwsched_send_recurring_cmdobj(struct adreno_device *adreno_dev,
+		struct kgsl_drawobj_cmd *cmdobj);
+
+/**
+ * gen8_hwsched_fault - Set hwsched fault to request recovery
+ * @adreno_dev: A handle to adreno device
+ * @fault: The type of fault
+ */
+void gen8_hwsched_fault(struct adreno_device *adreno_dev, u32 fault);
+
+#endif
--- a/qcom/opensource/graphics-kernel/adreno_gen8_hwsched_hfi.c
+++ b/qcom/opensource/graphics-kernel/adreno_gen8_hwsched_hfi.c
--- a/qcom/opensource/graphics-kernel/adreno_gen8_hwsched_hfi.h
+++ b/qcom/opensource/graphics-kernel/adreno_gen8_hwsched_hfi.h
@ -0,0 +1,359 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _ADRENO_GEN8_HWSCHED_HFI_H_
+#define _ADRENO_GEN8_HWSCHED_HFI_H_
+
+/* Maximum number of IBs in a submission */
+#define HWSCHED_MAX_NUMIBS \
+	((HFI_MAX_MSG_SIZE - offsetof(struct hfi_issue_cmd_cmd, ibs)) \
+		/ sizeof(struct hfi_issue_ib))
+
+/*
+ * This is used to put userspace threads to sleep when hardware fence unack count reaches a
+ * threshold. This bit is cleared in two scenarios:
+ * 1. If the hardware fence unack count drops to a desired threshold.
+ * 2. If there is a GMU/GPU fault. Because we don't want the threads to keep sleeping through fault
+ *    recovery, which can easily take 100s of milliseconds to complete.
+ */
+#define GEN8_HWSCHED_HW_FENCE_SLEEP_BIT	0x0
+
+/*
+ * This is used to avoid creating any more hardware fences until the hardware fence unack count
+ * drops to a desired threshold. This bit is required in cases where GEN8_HWSCHED_HW_FENCE_SLEEP_BIT
+ * will be cleared, but we still want to avoid creating any more hardware fences. For example, if
+ * hardware fence unack count reaches a maximum threshold, both GEN8_HWSCHED_HW_FENCE_SLEEP_BIT and
+ * GEN8_HWSCHED_HW_FENCE_MAX_BIT will be set. Say, a GMU/GPU fault happens and
+ * GEN8_HWSCHED_HW_FENCE_SLEEP_BIT will be cleared to wake up any sleeping threads. But,
+ * GEN8_HWSCHED_HW_FENCE_MAX_BIT will remain set to avoid creating any new hardware fences until
+ * recovery is complete and deferred drawctxt (if any) is handled.
+ */
+#define GEN8_HWSCHED_HW_FENCE_MAX_BIT	0x1
+
+/*
+ * This is used to avoid creating any more hardware fences until concurrent reset/recovery completes
+ */
+#define GEN8_HWSCHED_HW_FENCE_ABORT_BIT 0x2
+
+struct gen8_hwsched_hfi {
+	struct hfi_mem_alloc_entry mem_alloc_table[32];
+	u32 mem_alloc_entries;
+	/** @irq_mask: Store the hfi interrupt mask */
+	u32 irq_mask;
+	/** @msglock: To protect the list of un-ACKed hfi packets */
+	rwlock_t msglock;
+	/** @msglist: List of un-ACKed hfi packets */
+	struct list_head msglist;
+	/** @f2h_task: Task for processing gmu fw to host packets */
+	struct task_struct *f2h_task;
+	/** @f2h_wq: Waitqueue for the f2h_task */
+	wait_queue_head_t f2h_wq;
+	/** @big_ib: GMU buffer to hold big IBs */
+	struct kgsl_memdesc *big_ib;
+	/** @big_ib_recurring: GMU buffer to hold big recurring IBs */
+	struct kgsl_memdesc *big_ib_recurring;
+	/** @msg_mutex: Mutex for accessing the msgq */
+	struct mutex msgq_mutex;
+	struct {
+		/** @lock: Spinlock for managing hardware fences */
+		spinlock_t lock;
+		/**
+		 * @unack_count: Number of hardware fences sent to GMU but haven't yet been ack'd
+		 * by GMU
+		 */
+		u32 unack_count;
+		/**
+		 * @unack_wq: Waitqueue to wait on till number of unacked hardware fences drops to
+		 * a desired threshold
+		 */
+		wait_queue_head_t unack_wq;
+		/**
+		 * @defer_drawctxt: Drawctxt to send hardware fences from as soon as unacked
+		 * hardware fences drops to a desired threshold
+		 */
+		struct adreno_context *defer_drawctxt;
+		/**
+		 * @defer_ts: The timestamp of the hardware fence which got deferred
+		 */
+		u32 defer_ts;
+		/**
+		 * @flags: Flags to control the creation of new hardware fences
+		 */
+		unsigned long flags;
+		/** @seqnum: Sequence number for hardware fence packet header */
+		atomic_t seqnum;
+	} hw_fence;
+	/**
+	 * @hw_fence_timer: Timer to trigger fault if unack'd hardware fence count does'nt drop
+	 * to a desired threshold in given amount of time
+	 */
+	struct timer_list hw_fence_timer;
+	/**
+	 * @hw_fence_ws: Work struct that gets scheduled when hw_fence_timer expires
+	 */
+	struct work_struct hw_fence_ws;
+	/** @detached_hw_fences_list: List of hardware fences belonging to detached contexts */
+	struct list_head detached_hw_fence_list;
+	/** @defer_hw_fence_work: The work structure to send deferred hardware fences to GMU */
+	struct kthread_work defer_hw_fence_work;
+};
+
+struct kgsl_drawobj_cmd;
+
+/**
+ * gen8_hwsched_hfi_probe - Probe hwsched hfi resources
+ * @adreno_dev: Pointer to adreno device structure
+ *
+ * Return: 0 on success and negative error on failure.
+ */
+int gen8_hwsched_hfi_probe(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hwsched_hfi_remove - Release hwsched hfi resources
+ * @adreno_dev: Pointer to adreno device structure
+ */
+void gen8_hwsched_hfi_remove(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hwsched_hfi_init - Initialize hfi resources
+ * @adreno_dev: Pointer to adreno device structure
+ *
+ * This function is used to initialize hfi resources
+ * once before the very first gmu boot
+ *
+ * Return: 0 on success and negative error on failure.
+ */
+int gen8_hwsched_hfi_init(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hwsched_hfi_start - Start hfi resources
+ * @adreno_dev: Pointer to adreno device structure
+ *
+ * Send the various hfi packets before booting the gpu
+ *
+ * Return: 0 on success and negative error on failure.
+ */
+int gen8_hwsched_hfi_start(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hwsched_hfi_stop - Stop the hfi resources
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * This function does the hfi cleanup when powering down the gmu
+ */
+void gen8_hwsched_hfi_stop(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hwched_cp_init - Send CP_INIT via HFI
+ * @adreno_dev: Pointer to adreno device structure
+ *
+ * This function is used to send CP INIT packet and bring
+ * GPU out of secure mode using hfi raw packets.
+ *
+ * Return: 0 on success and negative error on failure.
+ */
+int gen8_hwsched_cp_init(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hfi_send_cmd_async - Send an hfi packet
+ * @adreno_dev: Pointer to adreno device structure
+ * @data: Data to be sent in the hfi packet
+ * @size_bytes: Size of the packet in bytes
+ *
+ * Send data in the form of an HFI packet to gmu and wait for
+ * it's ack asynchronously
+ *
+ * Return: 0 on success and negative error on failure.
+ */
+int gen8_hfi_send_cmd_async(struct adreno_device *adreno_dev, void *data, u32 size_bytes);
+
+/**
+ * gen8_hwsched_submit_drawobj - Dispatch IBs to dispatch queues
+ * @adreno_dev: Pointer to adreno device structure
+ * @drawobj: The command draw object which needs to be submitted
+ *
+ * This function is used to register the context if needed and submit
+ * IBs to the hfi dispatch queues.
+
+ * Return: 0 on success and negative error on failure
+ */
+int gen8_hwsched_submit_drawobj(struct adreno_device *adreno_dev,
+		struct kgsl_drawobj *drawobj);
+
+/**
+ * gen8_hwsched_context_detach - Unregister a context with GMU
+ * @drawctxt: Pointer to the adreno context
+ *
+ * This function sends context unregister HFI and waits for the ack
+ * to ensure all submissions from this context have retired
+ */
+void gen8_hwsched_context_detach(struct adreno_context *drawctxt);
+
+/* Helper function to get to gen8 hwsched hfi device from adreno device */
+struct gen8_hwsched_hfi *to_gen8_hwsched_hfi(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hwsched_preempt_count_get - Get preemption count from GMU
+ * @adreno_dev: Pointer to adreno device
+ *
+ * This function sends a GET_VALUE HFI packet to get the number of
+ * preemptions completed since last SLUMBER exit.
+ *
+ * Return: Preemption count
+ */
+u32 gen8_hwsched_preempt_count_get(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hwsched_parse_payload - Parse payload to look up a key
+ * @payload: Pointer to a payload section
+ * @key: The key who's value is to be looked up
+ *
+ * This function parses the payload data which is a sequence
+ * of key-value pairs.
+ *
+ * Return: The value of the key or 0 if key is not found
+ */
+u32 gen8_hwsched_parse_payload(struct payload_section *payload, u32 key);
+
+/**
+ * gen8_hwsched_lpac_cp_init - Send CP_INIT to LPAC via HFI
+ * @adreno_dev: Pointer to adreno device structure
+ *
+ * This function is used to send CP INIT packet to LPAC and
+ * enable submission to LPAC queue.
+ *
+ * Return: 0 on success and negative error on failure.
+ */
+int gen8_hwsched_lpac_cp_init(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hfi_send_lpac_feature_ctrl - Send the lpac feature hfi packet
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_hfi_send_lpac_feature_ctrl(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hwsched_context_destroy - Destroy any hwsched related resources during context destruction
+ * @adreno_dev: Pointer to adreno device
+ * @drawctxt: Pointer to the adreno context
+ *
+ * This functions destroys any hwsched related resources when this context is destroyed
+ */
+void gen8_hwsched_context_destroy(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt);
+
+/**
+ * gen8_hwsched_hfi_get_value - Send GET_VALUE packet to GMU to get the value of a property
+ * @adreno_dev: Pointer to adreno device
+ * @prop: property to get from GMU
+ *
+ * This functions sends GET_VALUE HFI packet to query value of a property
+ *
+ * Return: On success, return the value in the GMU response. On failure, return 0
+ */
+u32 gen8_hwsched_hfi_get_value(struct adreno_device *adreno_dev, u32 prop);
+
+/**
+ * gen8_send_hw_fence_hfi_wait_ack - Send hardware fence info to GMU
+ * @adreno_dev: Pointer to adreno device
+ * @entry: Pointer to the adreno hardware fence entry
+ * @flags: Flags for this hardware fence
+ *
+ * Send the hardware fence info to the GMU and wait for the ack
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int gen8_send_hw_fence_hfi_wait_ack(struct adreno_device *adreno_dev,
+	struct adreno_hw_fence_entry *entry, u64 flags);
+
+/**
+ * gen8_hwsched_create_hw_fence - Create a hardware fence
+ * @adreno_dev: Pointer to adreno device
+ * @kfence: Pointer to the kgsl fence
+ *
+ * Create a hardware fence, set up hardware fence info and send it to GMU if required
+ */
+void gen8_hwsched_create_hw_fence(struct adreno_device *adreno_dev,
+	struct kgsl_sync_fence *kfence);
+
+/**
+ * gen8_hwsched_drain_context_hw_fences - Drain context's hardware fences via GMU
+ * @adreno_dev: Pointer to adreno device
+ * @drawctxt: Pointer to the adreno context which is to be flushed
+ *
+ * Trigger hardware fences that were never dispatched to GMU
+ *
+ * Return: Zero on success or negative error on failure
+ */
+int gen8_hwsched_drain_context_hw_fences(struct adreno_device *adreno_dev,
+		struct adreno_context *drawctxt);
+
+/**
+ * gen8_hwsched_check_context_inflight_hw_fences - Check whether all hardware fences
+ * from a context have been sent to the TxQueue or not
+ * @adreno_dev: Pointer to adreno device
+ * @drawctxt: Pointer to the adreno context which is to be flushed
+ *
+ * Check if all hardware fences from this context have been sent to the
+ * TxQueue. If not, log an error and return error code.
+ *
+ * Return: Zero on success or negative error on failure
+ */
+int gen8_hwsched_check_context_inflight_hw_fences(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt);
+
+/**
+ * gen8_remove_hw_fence_entry - Remove hardware fence entry
+ * @adreno_dev: pointer to the adreno device
+ * @entry: Pointer to the hardware fence entry
+ */
+void gen8_remove_hw_fence_entry(struct adreno_device *adreno_dev,
+	struct adreno_hw_fence_entry *entry);
+
+/**
+ * gen8_trigger_hw_fence_cpu - Trigger hardware fence from cpu
+ * @adreno_dev: pointer to the adreno device
+ * @fence: hardware fence entry to be triggered
+ *
+ * Trigger the hardware fence by sending it to GMU's TxQueue and raise the
+ * interrupt from GMU to APPS
+ */
+void gen8_trigger_hw_fence_cpu(struct adreno_device *adreno_dev,
+	struct adreno_hw_fence_entry *fence);
+
+/**
+ * gen8_hwsched_disable_hw_fence_throttle - Disable hardware fence throttling after reset
+ * @adreno_dev: pointer to the adreno device
+ *
+ * After device reset, clear hardware fence related data structures and send any hardware fences
+ * that got deferred (prior to reset) and re-open the gates for hardware fence creation
+ *
+ * Return: Zero on success or negative error on failure
+ */
+int gen8_hwsched_disable_hw_fence_throttle(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hwsched_process_msgq - Process msgq
+ * @adreno_dev: pointer to the adreno device
+ *
+ * This function grabs the msgq mutex and processes msgq for any outstanding hfi packets
+ */
+void gen8_hwsched_process_msgq(struct adreno_device *adreno_dev);
+
+/**
+ * gen8_hwsched_boot_gpu - Send the command to boot GPU
+ * @adreno_dev: Pointer to adreno device
+ *
+ * Send the hfi to boot GPU, and check the ack, incase of a failure
+ * get a snapshot and capture registers of interest.
+ *
+ * Return: Zero on success or negative error on failure
+ */
+int gen8_hwsched_boot_gpu(struct adreno_device *adreno_dev);
+
+#endif
--- a/qcom/opensource/graphics-kernel/adreno_gen8_perfcounter.c
+++ b/qcom/opensource/graphics-kernel/adreno_gen8_perfcounter.c
--- a/qcom/opensource/graphics-kernel/adreno_gen8_preempt.c
+++ b/qcom/opensource/graphics-kernel/adreno_gen8_preempt.c
@ -0,0 +1,807 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include "adreno.h"
+#include "adreno_gen8.h"
+#include "adreno_pm4types.h"
+#include "adreno_trace.h"
+
+#define PREEMPT_RECORD(_field) \
+		offsetof(struct gen8_cp_preemption_record, _field)
+
+#define PREEMPT_SMMU_RECORD(_field) \
+		offsetof(struct gen8_cp_smmu_info, _field)
+
+static void _update_wptr(struct adreno_device *adreno_dev, bool reset_timer,
+	bool atomic)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_ringbuffer *rb = adreno_dev->cur_rb;
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&rb->preempt_lock, flags);
+
+	if (!atomic) {
+		/*
+		 * We might have skipped updating the wptr in case we are in
+		 * dispatcher context. Do it now.
+		 */
+		if (rb->skip_inline_wptr) {
+
+			ret = gen8_fenced_write(adreno_dev,
+				GEN8_CP_RB_WPTR_GC, rb->wptr,
+				FENCE_STATUS_WRITEDROPPED0_MASK);
+
+			reset_timer = true;
+			rb->skip_inline_wptr = false;
+		}
+	} else {
+		u32 wptr;
+
+		kgsl_regread(device, GEN8_CP_RB_WPTR_GC, &wptr);
+		if (wptr != rb->wptr) {
+			kgsl_regwrite(device, GEN8_CP_RB_WPTR_GC, rb->wptr);
+			reset_timer = true;
+		}
+	}
+
+	if (reset_timer)
+		rb->dispatch_q.expires = jiffies +
+			msecs_to_jiffies(adreno_drawobj_timeout);
+
+	spin_unlock_irqrestore(&rb->preempt_lock, flags);
+
+	if (!atomic) {
+		/* If WPTR update fails, set the fault and trigger recovery */
+		if (ret) {
+			gmu_core_fault_snapshot(device);
+			adreno_dispatcher_fault(adreno_dev,
+				ADRENO_GMU_FAULT_SKIP_SNAPSHOT);
+		}
+	}
+}
+
+static void _power_collapse_set(struct adreno_device *adreno_dev, bool val)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	gmu_core_regwrite(device,
+			GEN8_GMUCX_PWR_COL_PREEMPTION_KEEPALIVE, (val ? 1 : 0));
+}
+
+static void _gen8_preemption_done(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 status;
+
+	/*
+	 * In the very unlikely case that the power is off, do nothing - the
+	 * state will be reset on power up and everybody will be happy
+	 */
+
+	if (!kgsl_state_is_awake(device))
+		return;
+
+	kgsl_regread(device, GEN8_CP_CONTEXT_SWITCH_CNTL, &status);
+
+	if (status & 0x1) {
+		dev_err(device->dev,
+			"Preemption not complete: status=%X cur=%d R/W=%X/%X next=%d R/W=%X/%X\n",
+			status, adreno_dev->cur_rb->id,
+			adreno_get_rptr(adreno_dev->cur_rb),
+			adreno_dev->cur_rb->wptr,
+			adreno_dev->next_rb->id,
+			adreno_get_rptr(adreno_dev->next_rb),
+			adreno_dev->next_rb->wptr);
+
+		/* Set a fault and restart */
+		adreno_dispatcher_fault(adreno_dev, ADRENO_PREEMPT_FAULT);
+
+		return;
+	}
+
+	adreno_dev->preempt.count++;
+
+	del_timer_sync(&adreno_dev->preempt.timer);
+
+	kgsl_regread(device, GEN8_CP_CONTEXT_SWITCH_LEVEL_STATUS, &status);
+
+	trace_adreno_preempt_done(adreno_dev->cur_rb->id, adreno_dev->next_rb->id,
+		status, 0);
+
+	/* Clean up all the bits */
+	adreno_dev->prev_rb = adreno_dev->cur_rb;
+	adreno_dev->cur_rb = adreno_dev->next_rb;
+	adreno_dev->next_rb = NULL;
+
+	/* Update the wptr for the new command queue */
+	_update_wptr(adreno_dev, true, false);
+
+	/* Update the dispatcher timer for the new command queue */
+	mod_timer(&adreno_dev->dispatcher.timer,
+		adreno_dev->cur_rb->dispatch_q.expires);
+
+	/* Clear the preempt state */
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE);
+}
+
+static void _gen8_preemption_fault(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 status;
+
+	/*
+	 * If the power is on check the preemption status one more time - if it
+	 * was successful then just transition to the complete state
+	 */
+	if (kgsl_state_is_awake(device)) {
+		kgsl_regread(device, GEN8_CP_CONTEXT_SWITCH_CNTL, &status);
+
+		if (!(status & 0x1)) {
+			adreno_set_preempt_state(adreno_dev,
+				ADRENO_PREEMPT_COMPLETE);
+
+			adreno_dispatcher_schedule(device);
+			return;
+		}
+	}
+
+	dev_err(device->dev,
+		"Preemption Fault: cur=%d R/W=0x%x/0x%x, next=%d R/W=0x%x/0x%x\n",
+		adreno_dev->cur_rb->id,
+		adreno_get_rptr(adreno_dev->cur_rb),
+		adreno_dev->cur_rb->wptr,
+		adreno_dev->next_rb->id,
+		adreno_get_rptr(adreno_dev->next_rb),
+		adreno_dev->next_rb->wptr);
+
+	adreno_dispatcher_fault(adreno_dev, ADRENO_PREEMPT_FAULT);
+}
+
+static void _gen8_preemption_worker(struct work_struct *work)
+{
+	struct adreno_preemption *preempt = container_of(work,
+		struct adreno_preemption, work);
+	struct adreno_device *adreno_dev = container_of(preempt,
+		struct adreno_device, preempt);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	/* Need to take the mutex to make sure that the power stays on */
+	mutex_lock(&device->mutex);
+
+	if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_FAULTED))
+		_gen8_preemption_fault(adreno_dev);
+
+	mutex_unlock(&device->mutex);
+}
+
+/* Find the highest priority active ringbuffer */
+static struct adreno_ringbuffer *gen8_next_ringbuffer(
+		struct adreno_device *adreno_dev)
+{
+	struct adreno_ringbuffer *rb;
+	unsigned long flags;
+	u32 i;
+
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		bool empty;
+
+		spin_lock_irqsave(&rb->preempt_lock, flags);
+		empty = adreno_rb_empty(rb);
+		spin_unlock_irqrestore(&rb->preempt_lock, flags);
+
+		if (!empty)
+			return rb;
+	}
+
+	return NULL;
+}
+
+void gen8_preemption_trigger(struct adreno_device *adreno_dev, bool atomic)
+{
+	const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_iommu *iommu = KGSL_IOMMU(device);
+	struct adreno_ringbuffer *next;
+	u64 ttbr0, gpuaddr;
+	u32 contextidr, cntl;
+	unsigned long flags;
+	struct adreno_preemption *preempt = &adreno_dev->preempt;
+
+	/* Put ourselves into a possible trigger state */
+	if (!adreno_move_preempt_state(adreno_dev,
+		ADRENO_PREEMPT_NONE, ADRENO_PREEMPT_START))
+		return;
+
+	/* Get the next ringbuffer to preempt in */
+	next = gen8_next_ringbuffer(adreno_dev);
+
+	/*
+	 * Nothing to do if every ringbuffer is empty or if the current
+	 * ringbuffer is the only active one
+	 */
+	if (next == NULL || next == adreno_dev->cur_rb) {
+		/*
+		 * Update any critical things that might have been skipped while
+		 * we were looking for a new ringbuffer
+		 */
+
+		if (next != NULL) {
+			_update_wptr(adreno_dev, false, atomic);
+
+			mod_timer(&adreno_dev->dispatcher.timer,
+				adreno_dev->cur_rb->dispatch_q.expires);
+		}
+
+		adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE);
+		return;
+	}
+
+	/* Turn off the dispatcher timer */
+	del_timer(&adreno_dev->dispatcher.timer);
+
+	/*
+	 * This is the most critical section - we need to take care not to race
+	 * until we have programmed the CP for the switch
+	 */
+
+	spin_lock_irqsave(&next->preempt_lock, flags);
+
+	/* Get the pagetable from the pagetable info. */
+	kgsl_sharedmem_readq(device->scratch, &ttbr0,
+		SCRATCH_RB_OFFSET(next->id, ttbr0));
+	kgsl_sharedmem_readl(device->scratch, &contextidr,
+		SCRATCH_RB_OFFSET(next->id, contextidr));
+
+	kgsl_sharedmem_writel(next->preemption_desc,
+		PREEMPT_RECORD(wptr), next->wptr);
+
+	spin_unlock_irqrestore(&next->preempt_lock, flags);
+
+	/* And write it to the smmu info */
+	if (kgsl_mmu_is_perprocess(&device->mmu)) {
+		kgsl_sharedmem_writeq(iommu->smmu_info,
+			PREEMPT_SMMU_RECORD(ttbr0), ttbr0);
+		kgsl_sharedmem_writel(iommu->smmu_info,
+			PREEMPT_SMMU_RECORD(context_idr), contextidr);
+	}
+
+	kgsl_sharedmem_readq(preempt->scratch, &gpuaddr,
+		next->id * sizeof(u64));
+
+	/*
+	 * Set a keepalive bit before the first preemption register write.
+	 * This is required since while each individual write to the context
+	 * switch registers will wake the GPU from collapse, it will not in
+	 * itself cause GPU activity. Thus, the GPU could technically be
+	 * re-collapsed between subsequent register writes leading to a
+	 * prolonged preemption sequence. The keepalive bit prevents any
+	 * further power collapse while it is set.
+	 * It is more efficient to use a keepalive+wake-on-fence approach here
+	 * rather than an OOB. Both keepalive and the fence are effectively
+	 * free when the GPU is already powered on, whereas an OOB requires an
+	 * unconditional handshake with the GMU.
+	 */
+	_power_collapse_set(adreno_dev, true);
+
+	/*
+	 * Fenced writes on this path will make sure the GPU is woken up
+	 * in case it was power collapsed by the GMU.
+	 */
+	if (gen8_fenced_write(adreno_dev,
+		GEN8_CP_CONTEXT_SWITCH_PNSR_ADDR_LO,
+		lower_32_bits(next->preemption_desc->gpuaddr),
+		FENCE_STATUS_WRITEDROPPED1_MASK))
+		goto err;
+
+	/*
+	 * Above fence writes will make sure GMU comes out of
+	 * IFPC state if its was in IFPC state but it doesn't
+	 * guarantee that GMU FW actually moved to ACTIVE state
+	 * i.e. wake-up from IFPC is complete.
+	 * Wait for GMU to move to ACTIVE state before triggering
+	 * preemption. This is require to make sure CP doesn't
+	 * interrupt GMU during wake-up from IFPC.
+	 */
+	if (!atomic && gmu_core_dev_wait_for_active_transition(device))
+		goto err;
+
+	if (gen8_fenced_write(adreno_dev,
+		GEN8_CP_CONTEXT_SWITCH_PNSR_ADDR_HI,
+		upper_32_bits(next->preemption_desc->gpuaddr),
+		FENCE_STATUS_WRITEDROPPED1_MASK))
+		goto err;
+
+	if (gen8_fenced_write(adreno_dev,
+		GEN8_CP_CONTEXT_SWITCH_PSR_ADDR_LO,
+		lower_32_bits(next->secure_preemption_desc->gpuaddr),
+		FENCE_STATUS_WRITEDROPPED1_MASK))
+		goto err;
+
+	if (gen8_fenced_write(adreno_dev,
+		GEN8_CP_CONTEXT_SWITCH_PSR_ADDR_HI,
+		upper_32_bits(next->secure_preemption_desc->gpuaddr),
+		FENCE_STATUS_WRITEDROPPED1_MASK))
+		goto err;
+
+	if (gen8_fenced_write(adreno_dev,
+		GEN8_CP_CONTEXT_SWITCH_NPR_ADDR_LO,
+		lower_32_bits(gpuaddr),
+		FENCE_STATUS_WRITEDROPPED1_MASK))
+		goto err;
+
+	if (gen8_fenced_write(adreno_dev,
+		GEN8_CP_CONTEXT_SWITCH_NPR_ADDR_HI,
+		upper_32_bits(gpuaddr),
+		FENCE_STATUS_WRITEDROPPED1_MASK))
+		goto err;
+
+	adreno_dev->next_rb = next;
+
+	/* Start the timer to detect a stuck preemption */
+	mod_timer(&adreno_dev->preempt.timer,
+		jiffies + msecs_to_jiffies(ADRENO_PREEMPT_TIMEOUT));
+
+	cntl = (preempt->preempt_level << 6) | 0x01;
+
+	/* Skip save/restore during L1 preemption */
+	if (preempt->skipsaverestore)
+		cntl |= (1 << 9);
+
+	/* Enable GMEM save/restore across preemption */
+	if (preempt->usesgmem)
+		cntl |= (1 << 8);
+
+	trace_adreno_preempt_trigger(adreno_dev->cur_rb->id, adreno_dev->next_rb->id,
+		cntl, 0);
+
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_TRIGGERED);
+
+	if (gen8_core->qos_value)
+		kgsl_sharedmem_writel(preempt->scratch,
+			PREEMPT_SCRATCH_OFFSET(QOS_VALUE_IDX),
+			gen8_core->qos_value[next->id]);
+
+	/* Trigger the preemption */
+	if (gen8_fenced_write(adreno_dev, GEN8_CP_CONTEXT_SWITCH_CNTL, cntl,
+					FENCE_STATUS_WRITEDROPPED1_MASK)) {
+		adreno_dev->next_rb = NULL;
+		del_timer(&adreno_dev->preempt.timer);
+		goto err;
+	}
+
+	return;
+err:
+	/* If fenced write fails, take inline snapshot and trigger recovery */
+	if (!in_interrupt()) {
+		gmu_core_fault_snapshot(device);
+		adreno_dispatcher_fault(adreno_dev,
+			ADRENO_GMU_FAULT_SKIP_SNAPSHOT);
+	} else {
+		adreno_dispatcher_fault(adreno_dev, ADRENO_GMU_FAULT);
+	}
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE);
+	/* Clear the keep alive */
+	_power_collapse_set(adreno_dev, false);
+
+}
+
+void gen8_preemption_callback(struct adreno_device *adreno_dev, int bit)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 status;
+
+	if (!adreno_move_preempt_state(adreno_dev,
+		ADRENO_PREEMPT_TRIGGERED, ADRENO_PREEMPT_PENDING))
+		return;
+
+	kgsl_regread(device, GEN8_CP_CONTEXT_SWITCH_CNTL, &status);
+
+	if (status & 0x1) {
+		dev_err(KGSL_DEVICE(adreno_dev)->dev,
+			"preempt interrupt with non-zero status: %X\n",
+			status);
+
+		/*
+		 * Under the assumption that this is a race between the
+		 * interrupt and the register, schedule the worker to clean up.
+		 * If the status still hasn't resolved itself by the time we get
+		 * there then we have to assume something bad happened
+		 */
+		adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_COMPLETE);
+		adreno_dispatcher_schedule(KGSL_DEVICE(adreno_dev));
+		return;
+	}
+
+	adreno_dev->preempt.count++;
+
+	/*
+	 * We can now safely clear the preemption keepalive bit, allowing
+	 * power collapse to resume its regular activity.
+	 */
+	_power_collapse_set(adreno_dev, false);
+
+	del_timer(&adreno_dev->preempt.timer);
+
+	kgsl_regread(device, GEN8_CP_CONTEXT_SWITCH_LEVEL_STATUS, &status);
+
+	trace_adreno_preempt_done(adreno_dev->cur_rb->id, adreno_dev->next_rb->id,
+		status, 0);
+
+	adreno_dev->prev_rb = adreno_dev->cur_rb;
+	adreno_dev->cur_rb = adreno_dev->next_rb;
+	adreno_dev->next_rb = NULL;
+
+	/* Update the wptr if it changed while preemption was ongoing */
+	_update_wptr(adreno_dev, true, true);
+
+	/* Update the dispatcher timer for the new command queue */
+	mod_timer(&adreno_dev->dispatcher.timer,
+		adreno_dev->cur_rb->dispatch_q.expires);
+
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE);
+
+	gen8_preemption_trigger(adreno_dev, true);
+}
+
+void gen8_preemption_prepare_postamble(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev);
+	struct adreno_preemption *preempt = &adreno_dev->preempt;
+	u32 *postamble, count = 0;
+
+	/*
+	 * First 28 dwords of the device scratch buffer are used to store shadow rb data.
+	 * Reserve 15 dwords in the device scratch buffer from SCRATCH_POSTAMBLE_OFFSET for
+	 * KMD postamble pm4 packets. This should be in *device->scratch* so that userspace
+	 * cannot access it.
+	 */
+	postamble = device->scratch->hostptr + SCRATCH_POSTAMBLE_OFFSET;
+
+	/*
+	 * Reserve 4 dwords in the scratch buffer for dynamic QOS control feature. To ensure QOS
+	 * value is updated for first preemption, send it during bootup
+	 */
+	if (gen8_core->qos_value) {
+		postamble[count++] = cp_type7_packet(CP_MEM_TO_REG, 3);
+		postamble[count++] = GEN8_RBBM_GBIF_CLIENT_QOS_CNTL;
+		postamble[count++] = lower_32_bits(PREEMPT_SCRATCH_ADDR(adreno_dev, QOS_VALUE_IDX));
+		postamble[count++] = upper_32_bits(PREEMPT_SCRATCH_ADDR(adreno_dev, QOS_VALUE_IDX));
+	}
+
+	/*
+	 * Since postambles are not preserved across slumber, necessary packets
+	 * must be sent to GPU before first submission.
+	 *
+	 * If a packet needs to be sent before first submission, add it above this.
+	 */
+	preempt->postamble_bootup_len = count;
+
+	/* Reserve 15 dwords in the device scratch buffer to clear perfcounters */
+	if (!adreno_dev->perfcounter) {
+		postamble[count++] = cp_type7_packet(CP_REG_RMW, 3);
+		postamble[count++] = GEN8_RBBM_PERFCTR_SRAM_INIT_CMD;
+		postamble[count++] = 0x0;
+		postamble[count++] = 0x1;
+		postamble[count++] = cp_type7_packet(CP_REG_RMW, 3);
+		postamble[count++] = GEN8_RBBM_SLICE_PERFCTR_SRAM_INIT_CMD;
+		postamble[count++] = 0x0;
+		postamble[count++] = 0x1;
+
+
+		postamble[count++] = cp_type7_packet(CP_WAIT_REG_MEM, 6);
+		postamble[count++] = 0x3;
+		postamble[count++] = GEN8_RBBM_PERFCTR_SRAM_INIT_STATUS;
+		postamble[count++] = 0x0;
+		postamble[count++] = 0x1;
+		postamble[count++] = 0x1;
+		postamble[count++] = 0x0;
+	}
+
+	preempt->postamble_len = count;
+}
+
+void gen8_preemption_schedule(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return;
+
+	mutex_lock(&device->mutex);
+
+	if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_COMPLETE))
+		_gen8_preemption_done(adreno_dev);
+
+	gen8_preemption_trigger(adreno_dev, false);
+
+	mutex_unlock(&device->mutex);
+}
+
+u32 gen8_preemption_pre_ibsubmit(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb, struct adreno_context *drawctxt,
+		u32 *cmds)
+{
+	u32 *cmds_orig = cmds;
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return 0;
+
+	if (test_and_set_bit(ADRENO_RB_SET_PSEUDO_DONE, &rb->flags))
+		goto done;
+
+	*cmds++ = cp_type7_packet(CP_THREAD_CONTROL, 1);
+	*cmds++ = CP_SET_THREAD_BR;
+
+	*cmds++ = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 12);
+
+	/* NULL SMMU_INFO buffer - we track in KMD */
+	*cmds++ = SET_PSEUDO_SMMU_INFO;
+	cmds += cp_gpuaddr(adreno_dev, cmds, 0x0);
+
+	*cmds++ = SET_PSEUDO_PRIV_NON_SECURE_SAVE_ADDR;
+	cmds += cp_gpuaddr(adreno_dev, cmds, rb->preemption_desc->gpuaddr);
+
+	*cmds++ = SET_PSEUDO_PRIV_SECURE_SAVE_ADDR;
+	cmds += cp_gpuaddr(adreno_dev, cmds,
+			rb->secure_preemption_desc->gpuaddr);
+
+	/*
+	 * There is no need to specify this address when we are about to
+	 * trigger preemption. This is because CP internally stores this
+	 * address specified here in the CP_SET_PSEUDO_REGISTER payload to
+	 * the context record and thus knows from where to restore
+	 * the saved perfcounters for the new ringbuffer.
+	 */
+	*cmds++ = SET_PSEUDO_COUNTER;
+	cmds += cp_gpuaddr(adreno_dev, cmds,
+			rb->perfcounter_save_restore_desc->gpuaddr);
+
+done:
+	if (drawctxt) {
+		struct adreno_ringbuffer *rb = drawctxt->rb;
+		u64 dest = PREEMPT_SCRATCH_ADDR(adreno_dev, rb->id);
+		u64 gpuaddr = drawctxt->base.user_ctxt_record->memdesc.gpuaddr;
+
+		*cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 2);
+		cmds += cp_gpuaddr(adreno_dev, cmds, dest);
+		*cmds++ = lower_32_bits(gpuaddr);
+		*cmds++ = upper_32_bits(gpuaddr);
+
+		if (adreno_dev->preempt.postamble_len) {
+			u64 kmd_postamble_addr = SCRATCH_POSTAMBLE_ADDR(KGSL_DEVICE(adreno_dev));
+
+			*cmds++ = cp_type7_packet(CP_SET_AMBLE, 3);
+			*cmds++ = lower_32_bits(kmd_postamble_addr);
+			*cmds++ = upper_32_bits(kmd_postamble_addr);
+			*cmds++ = FIELD_PREP(GENMASK(22, 20), CP_KMD_AMBLE_TYPE)
+				| (FIELD_PREP(GENMASK(19, 0), adreno_dev->preempt.postamble_len));
+		}
+	}
+
+	return (u32) (cmds - cmds_orig);
+}
+
+u32 gen8_preemption_post_ibsubmit(struct adreno_device *adreno_dev,
+		u32 *cmds)
+{
+	u32 index = 0;
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return 0;
+
+	if (adreno_dev->cur_rb) {
+		u64 dest = PREEMPT_SCRATCH_ADDR(adreno_dev, adreno_dev->cur_rb->id);
+
+		cmds[index++] = cp_type7_packet(CP_MEM_WRITE, 4);
+		cmds[index++] = lower_32_bits(dest);
+		cmds[index++] = upper_32_bits(dest);
+		cmds[index++] = 0;
+		cmds[index++] = 0;
+	}
+
+	cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+	cmds[index++] = CP_SET_THREAD_BOTH;
+	cmds[index++] = cp_type7_packet(CP_CONTEXT_SWITCH_YIELD, 4);
+	cmds[index++] = 0;
+	cmds[index++] = 0;
+	cmds[index++] = 1;
+	cmds[index++] = 0;
+
+	return index;
+}
+
+void gen8_preemption_start(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_iommu *iommu = KGSL_IOMMU(device);
+	struct adreno_ringbuffer *rb;
+	u32 i;
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return;
+
+	/* Force the state to be clear */
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE);
+
+	if (kgsl_mmu_is_perprocess(&device->mmu)) {
+		/* smmu_info is allocated and mapped in gen8_preemption_iommu_init */
+		kgsl_sharedmem_writel(iommu->smmu_info,
+			PREEMPT_SMMU_RECORD(magic), GEN8_CP_SMMU_INFO_MAGIC_REF);
+		kgsl_sharedmem_writeq(iommu->smmu_info,
+			PREEMPT_SMMU_RECORD(ttbr0), MMU_DEFAULT_TTBR0(device));
+
+		/* The CP doesn't use the asid record, so poison it */
+		kgsl_sharedmem_writel(iommu->smmu_info,
+			PREEMPT_SMMU_RECORD(asid), 0xdecafbad);
+		kgsl_sharedmem_writel(iommu->smmu_info,
+			PREEMPT_SMMU_RECORD(context_idr), 0);
+
+		kgsl_regwrite(device, GEN8_CP_CONTEXT_SWITCH_SMMU_INFO_LO,
+			lower_32_bits(iommu->smmu_info->gpuaddr));
+
+		kgsl_regwrite(device, GEN8_CP_CONTEXT_SWITCH_SMMU_INFO_HI,
+			upper_32_bits(iommu->smmu_info->gpuaddr));
+	}
+
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		kgsl_sharedmem_writel(rb->preemption_desc,
+			PREEMPT_RECORD(rptr), 0);
+		kgsl_sharedmem_writel(rb->preemption_desc,
+			PREEMPT_RECORD(wptr), 0);
+
+		adreno_ringbuffer_set_pagetable(device, rb,
+			device->mmu.defaultpagetable);
+
+		clear_bit(ADRENO_RB_SET_PSEUDO_DONE, &rb->flags);
+	}
+}
+
+static void reset_rb_preempt_record(struct adreno_device *adreno_dev,
+	struct adreno_ringbuffer *rb)
+{
+	memset(rb->preemption_desc->hostptr, 0x0, rb->preemption_desc->size);
+
+	kgsl_sharedmem_writel(rb->preemption_desc,
+		PREEMPT_RECORD(magic), GEN8_CP_CTXRECORD_MAGIC_REF);
+	kgsl_sharedmem_writel(rb->preemption_desc,
+		PREEMPT_RECORD(cntl), GEN8_CP_RB_CNTL_DEFAULT);
+	kgsl_sharedmem_writeq(rb->preemption_desc,
+		PREEMPT_RECORD(rptr_addr), SCRATCH_RB_GPU_ADDR(
+		KGSL_DEVICE(adreno_dev), rb->id, rptr));
+	kgsl_sharedmem_writeq(rb->preemption_desc,
+		PREEMPT_RECORD(rbase), rb->buffer_desc->gpuaddr);
+	kgsl_sharedmem_writeq(rb->preemption_desc,
+		PREEMPT_RECORD(bv_rptr_addr), SCRATCH_RB_GPU_ADDR(
+		KGSL_DEVICE(adreno_dev), rb->id, bv_rptr));
+}
+
+void gen8_reset_preempt_records(struct adreno_device *adreno_dev)
+{
+	int i;
+	struct adreno_ringbuffer *rb;
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return;
+
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		reset_rb_preempt_record(adreno_dev, rb);
+	}
+}
+
+static int gen8_preemption_ringbuffer_init(struct adreno_device *adreno_dev,
+	struct adreno_ringbuffer *rb)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev);
+	u64 ctxt_record_size = GEN8_CP_CTXRECORD_SIZE_IN_BYTES;
+	int ret;
+
+	if (gen8_core->ctxt_record_size)
+		ctxt_record_size = gen8_core->ctxt_record_size;
+
+	ret = adreno_allocate_global(device, &rb->preemption_desc,
+		ctxt_record_size, SZ_16K, 0,
+		KGSL_MEMDESC_PRIVILEGED, "preemption_desc");
+	if (ret)
+		return ret;
+
+	ret = adreno_allocate_global(device, &rb->secure_preemption_desc,
+		ctxt_record_size, 0,
+		KGSL_MEMFLAGS_SECURE, KGSL_MEMDESC_PRIVILEGED,
+		"secure_preemption_desc");
+	if (ret)
+		return ret;
+
+	ret = adreno_allocate_global(device, &rb->perfcounter_save_restore_desc,
+		GEN8_CP_PERFCOUNTER_SAVE_RESTORE_SIZE, 0, 0,
+		KGSL_MEMDESC_PRIVILEGED,
+		"perfcounter_save_restore_desc");
+	if (ret)
+		return ret;
+
+	reset_rb_preempt_record(adreno_dev, rb);
+
+	return 0;
+}
+
+int gen8_preemption_init(struct adreno_device *adreno_dev)
+{
+	u32 flags = ADRENO_FEATURE(adreno_dev, ADRENO_APRIV) ? KGSL_MEMDESC_PRIVILEGED : 0;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_iommu *iommu = KGSL_IOMMU(device);
+	struct adreno_preemption *preempt = &adreno_dev->preempt;
+	struct adreno_ringbuffer *rb;
+	int ret;
+	u32 i;
+
+	/* We are dependent on IOMMU to make preemption go on the CP side */
+	if (kgsl_mmu_get_mmutype(device) != KGSL_MMU_TYPE_IOMMU) {
+		ret = -ENODEV;
+		goto done;
+	}
+
+	INIT_WORK(&preempt->work, _gen8_preemption_worker);
+
+	/* Allocate mem for storing preemption switch record */
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		ret = gen8_preemption_ringbuffer_init(adreno_dev, rb);
+		if (ret)
+			goto done;
+	}
+
+	ret = adreno_allocate_global(device, &preempt->scratch, PAGE_SIZE,
+			0, 0, flags, "preempt_scratch");
+	if (ret)
+		goto done;
+
+	/* Allocate mem for storing preemption smmu record */
+	if (kgsl_mmu_is_perprocess(&device->mmu)) {
+		ret = adreno_allocate_global(device, &iommu->smmu_info, PAGE_SIZE, 0,
+			KGSL_MEMFLAGS_GPUREADONLY, KGSL_MEMDESC_PRIVILEGED,
+			"smmu_info");
+		if (ret)
+			goto done;
+	}
+
+	return 0;
+done:
+	clear_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv);
+	return ret;
+}
+
+int gen8_preemption_context_init(struct kgsl_context *context)
+{
+	struct kgsl_device *device = context->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	u64 flags = 0;
+
+	if (!adreno_preemption_feature_set(adreno_dev))
+		return 0;
+
+	if (context->flags & KGSL_CONTEXT_SECURE)
+		flags |= KGSL_MEMFLAGS_SECURE;
+
+	if (is_compat_task())
+		flags |= KGSL_MEMFLAGS_FORCE_32BIT;
+
+	/*
+	 * gpumem_alloc_entry takes an extra refcount. Put it only when
+	 * destroying the context to keep the context record valid
+	 */
+	context->user_ctxt_record = gpumem_alloc_entry(context->dev_priv,
+			GEN8_CP_CTXRECORD_USER_RESTORE_SIZE, flags);
+	if (IS_ERR(context->user_ctxt_record)) {
+		int ret = PTR_ERR(context->user_ctxt_record);
+
+		context->user_ctxt_record = NULL;
+		return ret;
+	}
+
+	return 0;
+}
--- a/qcom/opensource/graphics-kernel/adreno_gen8_ringbuffer.c
+++ b/qcom/opensource/graphics-kernel/adreno_gen8_ringbuffer.c
@ -0,0 +1,649 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include "adreno.h"
+#include "adreno_gen8.h"
+#include "adreno_pm4types.h"
+#include "adreno_ringbuffer.h"
+#include "adreno_trace.h"
+#include "kgsl_trace.h"
+
+static bool is_concurrent_binning(struct adreno_context *drawctxt)
+{
+	if (!drawctxt)
+		return false;
+
+	return !(drawctxt->base.flags & KGSL_CONTEXT_SECURE);
+}
+
+static int gen8_rb_pagetable_switch(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb, struct adreno_context *drawctxt,
+		struct kgsl_pagetable *pagetable, u32 *cmds)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u64 ttbr0 = kgsl_mmu_pagetable_get_ttbr0(pagetable);
+	int count = 0;
+	u32 id = drawctxt ? drawctxt->base.id : 0;
+
+	if (pagetable == device->mmu.defaultpagetable)
+		return 0;
+
+	/* CP switches the pagetable and flushes the Caches */
+	cmds[count++] = cp_type7_packet(CP_SMMU_TABLE_UPDATE, 3);
+	cmds[count++] = lower_32_bits(ttbr0);
+	cmds[count++] = upper_32_bits(ttbr0);
+	cmds[count++] = id;
+
+	cmds[count++] = cp_type7_packet(CP_MEM_WRITE, 5);
+	cmds[count++] = lower_32_bits(SCRATCH_RB_GPU_ADDR(device,
+				rb->id, ttbr0));
+	cmds[count++] = upper_32_bits(SCRATCH_RB_GPU_ADDR(device,
+				rb->id, ttbr0));
+	cmds[count++] = lower_32_bits(ttbr0);
+	cmds[count++] = upper_32_bits(ttbr0);
+	cmds[count++] = id;
+
+	/*
+	 * Sync both threads after switching pagetables and enable BR only
+	 * to make sure BV doesn't race ahead while BR is still switching
+	 * pagetables.
+	 */
+	cmds[count++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+	cmds[count++] = CP_SYNC_THREADS | CP_SET_THREAD_BR;
+
+	return count;
+}
+
+static int gen8_rb_context_switch(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb,
+		struct adreno_context *drawctxt)
+{
+	struct kgsl_pagetable *pagetable =
+		adreno_drawctxt_get_pagetable(drawctxt);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int count = 0;
+	u32 cmds[57];
+
+	/* Sync both threads */
+	cmds[count++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+	cmds[count++] = CP_SYNC_THREADS | CP_SET_THREAD_BOTH;
+	/* Reset context state */
+	cmds[count++] = cp_type7_packet(CP_RESET_CONTEXT_STATE, 1);
+	cmds[count++] = CP_RESET_GLOBAL_LOCAL_TS | CP_CLEAR_BV_BR_COUNTER |
+			CP_CLEAR_RESOURCE_TABLE | CP_CLEAR_ON_CHIP_TS;
+	/*
+	 * Enable/disable concurrent binning for pagetable switch and
+	 * set the thread to BR since only BR can execute the pagetable
+	 * switch packets.
+	 */
+	/* Sync both threads and enable BR only */
+	cmds[count++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+	cmds[count++] = CP_SYNC_THREADS | CP_SET_THREAD_BR;
+
+	if (adreno_drawctxt_get_pagetable(rb->drawctxt_active) != pagetable) {
+
+		/* Clear performance counters during context switches */
+		if (!adreno_dev->perfcounter) {
+			cmds[count++] = cp_type4_packet(GEN8_RBBM_PERFCTR_SRAM_INIT_CMD, 1);
+			cmds[count++] = 0x1;
+			cmds[count++] = cp_type4_packet(GEN8_RBBM_SLICE_PERFCTR_SRAM_INIT_CMD, 1);
+			cmds[count++] = 0x1;
+		}
+
+		count += gen8_rb_pagetable_switch(adreno_dev, rb,
+			drawctxt, pagetable, &cmds[count]);
+
+		/* Wait for performance counter clear to finish */
+		if (!adreno_dev->perfcounter) {
+			cmds[count++] = cp_type7_packet(CP_WAIT_REG_MEM, 6);
+			cmds[count++] = 0x3;
+			cmds[count++] = GEN8_RBBM_PERFCTR_SRAM_INIT_STATUS;
+			cmds[count++] = 0x0;
+			cmds[count++] = 0x1;
+			cmds[count++] = 0x1;
+			cmds[count++] = 0x0;
+		}
+	} else {
+		struct kgsl_iommu *iommu = KGSL_IOMMU(device);
+
+		u32 offset = GEN8_SMMU_BASE + (iommu->cb0_offset >> 2) + 0x0d;
+
+		/*
+		 * Set the CONTEXTIDR register to the current context id so we
+		 * can use it in pagefault debugging. Unlike TTBR0 we don't
+		 * need any special sequence or locking to change it
+		 */
+		cmds[count++] = cp_type4_packet(offset, 1);
+		cmds[count++] = drawctxt->base.id;
+	}
+
+	cmds[count++] = cp_type7_packet(CP_NOP, 1);
+	cmds[count++] = CONTEXT_TO_MEM_IDENTIFIER;
+
+	cmds[count++] = cp_type7_packet(CP_MEM_WRITE, 3);
+	cmds[count++] = lower_32_bits(MEMSTORE_RB_GPU_ADDR(device, rb,
+				current_context));
+	cmds[count++] = upper_32_bits(MEMSTORE_RB_GPU_ADDR(device, rb,
+				current_context));
+	cmds[count++] = drawctxt->base.id;
+
+	cmds[count++] = cp_type7_packet(CP_MEM_WRITE, 3);
+	cmds[count++] = lower_32_bits(MEMSTORE_ID_GPU_ADDR(device,
+		KGSL_MEMSTORE_GLOBAL, current_context));
+	cmds[count++] = upper_32_bits(MEMSTORE_ID_GPU_ADDR(device,
+		KGSL_MEMSTORE_GLOBAL, current_context));
+	cmds[count++] = drawctxt->base.id;
+
+	cmds[count++] = cp_type7_packet(CP_EVENT_WRITE, 1);
+	cmds[count++] = 0x31;
+
+	if (adreno_is_preemption_enabled(adreno_dev)) {
+		u64 gpuaddr = drawctxt->base.user_ctxt_record->memdesc.gpuaddr;
+
+		cmds[count++] = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 3);
+		cmds[count++] = SET_PSEUDO_NON_PRIV_SAVE_ADDR;
+		cmds[count++] = lower_32_bits(gpuaddr);
+		cmds[count++] = upper_32_bits(gpuaddr);
+	}
+
+	return gen8_ringbuffer_addcmds(adreno_dev, rb, NULL, F_NOTPROTECTED,
+			cmds, count, 0, NULL);
+}
+
+#define RB_SOPTIMESTAMP(device, rb) \
+	MEMSTORE_RB_GPU_ADDR(device, rb, soptimestamp)
+#define CTXT_SOPTIMESTAMP(device, drawctxt) \
+	MEMSTORE_ID_GPU_ADDR(device, (drawctxt)->base.id, soptimestamp)
+
+#define RB_EOPTIMESTAMP(device, rb) \
+	MEMSTORE_RB_GPU_ADDR(device, rb, eoptimestamp)
+#define CTXT_EOPTIMESTAMP(device, drawctxt) \
+	MEMSTORE_ID_GPU_ADDR(device, (drawctxt)->base.id, eoptimestamp)
+
+int gen8_ringbuffer_submit(struct adreno_ringbuffer *rb,
+		struct adreno_submit_time *time)
+{
+	struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret = 0;
+	unsigned long flags;
+
+	adreno_get_submit_time(adreno_dev, rb, time);
+	adreno_profile_submit_time(time);
+
+	spin_lock_irqsave(&rb->preempt_lock, flags);
+	if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE)) {
+		if (adreno_dev->cur_rb == rb) {
+			kgsl_pwrscale_busy(device);
+			ret = gen8_fenced_write(adreno_dev,
+				GEN8_CP_RB_WPTR_GC, rb->_wptr,
+				FENCE_STATUS_WRITEDROPPED0_MASK);
+			rb->skip_inline_wptr = false;
+		}
+	} else {
+		if (adreno_dev->cur_rb == rb)
+			rb->skip_inline_wptr = true;
+	}
+
+	rb->wptr = rb->_wptr;
+	spin_unlock_irqrestore(&rb->preempt_lock, flags);
+
+	if (ret) {
+		/*
+		 * If WPTR update fails, take inline snapshot and trigger
+		 * recovery.
+		 */
+		gmu_core_fault_snapshot(device);
+		adreno_dispatcher_fault(adreno_dev,
+			ADRENO_GMU_FAULT_SKIP_SNAPSHOT);
+	}
+
+	return ret;
+}
+
+int gen8_ringbuffer_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int i, ret;
+
+	ret = adreno_allocate_global(device, &device->scratch, PAGE_SIZE,
+			0, 0, KGSL_MEMDESC_RANDOM | KGSL_MEMDESC_PRIVILEGED,
+			"scratch");
+	if (ret)
+		return ret;
+
+	adreno_dev->cur_rb = &(adreno_dev->ringbuffers[0]);
+
+	if (!adreno_preemption_feature_set(adreno_dev)) {
+		adreno_dev->num_ringbuffers = 1;
+		return adreno_ringbuffer_setup(adreno_dev,
+				&adreno_dev->ringbuffers[0], 0);
+	}
+
+	adreno_dev->num_ringbuffers = ARRAY_SIZE(adreno_dev->ringbuffers);
+
+	for (i = 0; i < adreno_dev->num_ringbuffers; i++) {
+		int ret;
+
+		ret = adreno_ringbuffer_setup(adreno_dev,
+				&adreno_dev->ringbuffers[i], i);
+		if (ret)
+			return ret;
+	}
+
+	timer_setup(&adreno_dev->preempt.timer, adreno_preemption_timer, 0);
+	gen8_preemption_init(adreno_dev);
+	return 0;
+}
+
+#define GEN8_SUBMIT_MAX 104
+
+int gen8_ringbuffer_addcmds(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb, struct adreno_context *drawctxt,
+		u32 flags, u32 *in, u32 dwords, u32 timestamp,
+		struct adreno_submit_time *time)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	u32 size = GEN8_SUBMIT_MAX + dwords;
+	u32 *cmds, index = 0;
+	u64 profile_gpuaddr;
+	u32 profile_dwords;
+
+	if (adreno_drawctxt_detached(drawctxt))
+		return -ENOENT;
+
+	if (adreno_gpu_fault(adreno_dev) != 0)
+		return -EPROTO;
+
+	rb->timestamp++;
+
+	if (drawctxt)
+		drawctxt->internal_timestamp = rb->timestamp;
+
+	/* All submissions are run with protected mode off due to APRIV */
+	flags &= ~F_NOTPROTECTED;
+
+	cmds = adreno_ringbuffer_allocspace(rb, size);
+	if (IS_ERR(cmds))
+		return PTR_ERR(cmds);
+
+	/* Identify the start of a command */
+	cmds[index++] = cp_type7_packet(CP_NOP, 1);
+	cmds[index++] = drawctxt ? CMD_IDENTIFIER : CMD_INTERNAL_IDENTIFIER;
+
+	/* This is 25 dwords when drawctxt is not NULL and perfcounter needs to be zapped*/
+	index += gen8_preemption_pre_ibsubmit(adreno_dev, rb, drawctxt,
+		&cmds[index]);
+
+	cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+	cmds[index++] = CP_SET_THREAD_BOTH;
+
+	cmds[index++] = cp_type7_packet(CP_SET_MARKER, 1);
+	cmds[index++] = 0x101; /* IFPC disable */
+
+	cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+	cmds[index++] = CP_SET_THREAD_BR;
+
+	profile_gpuaddr = adreno_profile_preib_processing(adreno_dev,
+		drawctxt, &profile_dwords);
+
+	if (profile_gpuaddr) {
+		cmds[index++] = cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3);
+		cmds[index++] = lower_32_bits(profile_gpuaddr);
+		cmds[index++] = upper_32_bits(profile_gpuaddr);
+		cmds[index++] = profile_dwords;
+	}
+
+	if (drawctxt) {
+		cmds[index++] = cp_type7_packet(CP_MEM_WRITE, 3);
+		cmds[index++] = lower_32_bits(CTXT_SOPTIMESTAMP(device,
+					drawctxt));
+		cmds[index++] = upper_32_bits(CTXT_SOPTIMESTAMP(device,
+					drawctxt));
+		cmds[index++] = timestamp;
+	}
+
+	cmds[index++] = cp_type7_packet(CP_MEM_WRITE, 3);
+	cmds[index++] = lower_32_bits(RB_SOPTIMESTAMP(device, rb));
+	cmds[index++] = upper_32_bits(RB_SOPTIMESTAMP(device, rb));
+	cmds[index++] = rb->timestamp;
+
+	if (IS_SECURE(flags)) {
+		/* Sync BV and BR if entering secure mode */
+		cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+		cmds[index++] = CP_SYNC_THREADS | CP_CONCURRENT_BIN_DISABLE;
+		cmds[index++] = cp_type7_packet(CP_SET_SECURE_MODE, 1);
+		cmds[index++] = 1;
+	}
+
+	memcpy(&cmds[index], in, dwords << 2);
+	index += dwords;
+
+	profile_gpuaddr = adreno_profile_postib_processing(adreno_dev,
+		drawctxt, &dwords);
+
+	if (profile_gpuaddr) {
+		cmds[index++] = cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3);
+		cmds[index++] = lower_32_bits(profile_gpuaddr);
+		cmds[index++] = upper_32_bits(profile_gpuaddr);
+		cmds[index++] = profile_dwords;
+	}
+
+	if (test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE, &device->mmu.pfpolicy))
+		cmds[index++] = cp_type7_packet(CP_WAIT_MEM_WRITES, 0);
+
+	if (is_concurrent_binning(drawctxt)) {
+		u64 addr = SCRATCH_RB_GPU_ADDR(device, rb->id, bv_ts);
+
+		cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+		cmds[index++] = CP_SET_THREAD_BV;
+
+		/*
+		 * Make sure the timestamp is committed once BV pipe is
+		 * completely done with this submission.
+		 */
+		cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4);
+		cmds[index++] = CACHE_CLEAN | BIT(27);
+		cmds[index++] = lower_32_bits(addr);
+		cmds[index++] = upper_32_bits(addr);
+		cmds[index++] = rb->timestamp;
+
+		cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+		cmds[index++] = CP_SET_THREAD_BR;
+
+		/*
+		 * This makes sure that BR doesn't race ahead and commit
+		 * timestamp to memstore while BV is still processing
+		 * this submission.
+		 */
+		cmds[index++] = cp_type7_packet(CP_WAIT_TIMESTAMP, 4);
+		cmds[index++] = 0;
+		cmds[index++] = lower_32_bits(addr);
+		cmds[index++] = upper_32_bits(addr);
+		cmds[index++] = rb->timestamp;
+	}
+
+	/*
+	 * If this is an internal command, just write the ringbuffer timestamp,
+	 * otherwise, write both
+	 */
+	if (!drawctxt) {
+		cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4);
+		cmds[index++] = CACHE_CLEAN | BIT(31) | BIT(27);
+		cmds[index++] = lower_32_bits(RB_EOPTIMESTAMP(device, rb));
+		cmds[index++] = upper_32_bits(RB_EOPTIMESTAMP(device, rb));
+		cmds[index++] = rb->timestamp;
+	} else {
+		cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4);
+		cmds[index++] = CACHE_CLEAN | BIT(31) | BIT(27);
+		cmds[index++] = lower_32_bits(CTXT_EOPTIMESTAMP(device,
+					drawctxt));
+		cmds[index++] = upper_32_bits(CTXT_EOPTIMESTAMP(device,
+					drawctxt));
+		cmds[index++] = timestamp;
+
+		cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 4);
+		cmds[index++] = CACHE_CLEAN | BIT(27);
+		cmds[index++] = lower_32_bits(RB_EOPTIMESTAMP(device, rb));
+		cmds[index++] = upper_32_bits(RB_EOPTIMESTAMP(device, rb));
+		cmds[index++] = rb->timestamp;
+	}
+
+	if (IS_WFI(flags))
+		cmds[index++] = cp_type7_packet(CP_WAIT_FOR_IDLE, 0);
+
+	if (IS_SECURE(flags)) {
+		cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+		cmds[index++] = CP_CONCURRENT_BIN_DISABLE;
+		cmds[index++] = cp_type7_packet(CP_SET_SECURE_MODE, 1);
+		cmds[index++] = 0;
+		cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+		cmds[index++] = CP_SYNC_THREADS;
+	}
+
+	cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+	cmds[index++] = CP_SET_THREAD_BOTH;
+
+	cmds[index++] = cp_type7_packet(CP_SET_MARKER, 1);
+	cmds[index++] = 0x100; /* IFPC enable */
+
+	cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+	cmds[index++] = CP_SET_THREAD_BR;
+
+	/* 10 dwords */
+	index += gen8_preemption_post_ibsubmit(adreno_dev, &cmds[index]);
+
+	/* Adjust the thing for the number of bytes we actually wrote */
+	rb->_wptr -= (size - index);
+
+	return gen8_ringbuffer_submit(rb, time);
+}
+
+static u32 gen8_get_alwayson_counter(u32 *cmds, u64 gpuaddr)
+{
+	cmds[0] = cp_type7_packet(CP_REG_TO_MEM, 3);
+	cmds[1] = GEN8_CP_ALWAYS_ON_COUNTER_LO | (1 << 30) | (2 << 18);
+	cmds[2] = lower_32_bits(gpuaddr);
+	cmds[3] = upper_32_bits(gpuaddr);
+
+	return 4;
+}
+
+static u32 gen8_get_alwayson_context(u32 *cmds, u64 gpuaddr)
+{
+	cmds[0] = cp_type7_packet(CP_REG_TO_MEM, 3);
+	cmds[1] = GEN8_CP_ALWAYS_ON_CONTEXT_LO | (1 << 30) | (2 << 18);
+	cmds[2] = lower_32_bits(gpuaddr);
+	cmds[3] = upper_32_bits(gpuaddr);
+
+	return 4;
+}
+
+#define PROFILE_IB_DWORDS 4
+#define PROFILE_IB_SLOTS (PAGE_SIZE / (PROFILE_IB_DWORDS << 2))
+
+static u64 gen8_get_user_profiling_ib(struct adreno_ringbuffer *rb,
+		struct kgsl_drawobj_cmd *cmdobj, u32 target_offset, u32 *cmds)
+{
+	u32 offset, *ib, dwords;
+
+	if (IS_ERR(rb->profile_desc))
+		return 0;
+
+	offset = rb->profile_index * (PROFILE_IB_DWORDS << 2);
+	ib = rb->profile_desc->hostptr + offset;
+	dwords = gen8_get_alwayson_counter(ib,
+		cmdobj->profiling_buffer_gpuaddr + target_offset);
+
+	cmds[0] = cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3);
+	cmds[1] = lower_32_bits(rb->profile_desc->gpuaddr + offset);
+	cmds[2] = upper_32_bits(rb->profile_desc->gpuaddr + offset);
+	cmds[3] = dwords;
+
+	rb->profile_index = (rb->profile_index + 1) % PROFILE_IB_SLOTS;
+
+	return 4;
+}
+
+static int gen8_drawctxt_switch(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb,
+		struct adreno_context *drawctxt)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	if (rb->drawctxt_active == drawctxt)
+		return 0;
+
+	if (kgsl_context_detached(&drawctxt->base))
+		return -ENOENT;
+
+	if (!_kgsl_context_get(&drawctxt->base))
+		return -ENOENT;
+
+	ret = gen8_rb_context_switch(adreno_dev, rb, drawctxt);
+	if (ret) {
+		kgsl_context_put(&drawctxt->base);
+		return ret;
+	}
+
+	trace_adreno_drawctxt_switch(rb, drawctxt);
+
+	/* Release the current drawctxt as soon as the new one is switched */
+	adreno_put_drawctxt_on_timestamp(device, rb->drawctxt_active,
+		rb, rb->timestamp);
+
+	rb->drawctxt_active = drawctxt;
+	return 0;
+}
+
+
+#define GEN8_USER_PROFILE_IB(rb, cmdobj, cmds, field) \
+	gen8_get_user_profiling_ib((rb), (cmdobj), \
+		offsetof(struct kgsl_drawobj_profiling_buffer, field), \
+		(cmds))
+
+#define GEN8_KERNEL_PROFILE(dev, cmdobj, cmds, field) \
+	gen8_get_alwayson_counter((cmds), \
+		(dev)->profile_buffer->gpuaddr + \
+			ADRENO_DRAWOBJ_PROFILE_OFFSET((cmdobj)->profile_index, \
+				field))
+
+#define GEN8_KERNEL_PROFILE_CONTEXT(dev, cmdobj, cmds, field) \
+	gen8_get_alwayson_context((cmds), \
+		(dev)->profile_buffer->gpuaddr + \
+			ADRENO_DRAWOBJ_PROFILE_OFFSET((cmdobj)->profile_index, \
+				field))
+
+#define GEN8_COMMAND_DWORDS 60
+
+int gen8_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
+		struct kgsl_drawobj_cmd *cmdobj, u32 flags,
+		struct adreno_submit_time *time)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context);
+	struct adreno_ringbuffer *rb = drawctxt->rb;
+	int ret = 0, numibs = 0, index = 0;
+	u32 *cmds;
+
+	/* Count the number of IBs (if we are not skipping) */
+	if (!IS_SKIP(flags)) {
+		struct list_head *tmp;
+
+		list_for_each(tmp, &cmdobj->cmdlist)
+			numibs++;
+	}
+
+	cmds = kvmalloc((GEN8_COMMAND_DWORDS + (numibs * 5)) << 2, GFP_KERNEL);
+	if (!cmds) {
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	cmds[index++] = cp_type7_packet(CP_NOP, 1);
+	cmds[index++] = START_IB_IDENTIFIER;
+
+	/* Kernel profiling: 8 dwords */
+	if (IS_KERNEL_PROFILE(flags)) {
+		index += GEN8_KERNEL_PROFILE(adreno_dev, cmdobj, &cmds[index],
+			started);
+		index += GEN8_KERNEL_PROFILE_CONTEXT(adreno_dev, cmdobj, &cmds[index],
+			ctx_start);
+	}
+
+	/* User profiling: 4 dwords */
+	if (IS_USER_PROFILE(flags))
+		index += GEN8_USER_PROFILE_IB(rb, cmdobj, &cmds[index],
+			gpu_ticks_submitted);
+
+	if (is_concurrent_binning(drawctxt)) {
+		cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+		cmds[index++] = CP_SET_THREAD_BOTH;
+	}
+	if (numibs) {
+		struct kgsl_memobj_node *ib;
+
+		cmds[index++] = cp_type7_packet(CP_SET_MARKER, 1);
+		cmds[index++] = 0x00d; /* IB1LIST start */
+
+		list_for_each_entry(ib, &cmdobj->cmdlist, node) {
+			if (ib->priv & MEMOBJ_SKIP ||
+				(ib->flags & KGSL_CMDLIST_CTXTSWITCH_PREAMBLE &&
+				 !IS_PREAMBLE(flags)))
+				cmds[index++] = cp_type7_packet(CP_NOP, 4);
+
+			cmds[index++] =
+				cp_type7_packet(CP_INDIRECT_BUFFER_PFE, 3);
+			cmds[index++] = lower_32_bits(ib->gpuaddr);
+			cmds[index++] = upper_32_bits(ib->gpuaddr);
+
+			/* Double check that IB_PRIV is never set */
+			cmds[index++] = (ib->size >> 2) & 0xfffff;
+		}
+
+		cmds[index++] = cp_type7_packet(CP_SET_MARKER, 1);
+		cmds[index++] = 0x00e; /* IB1LIST end */
+	}
+
+	if (is_concurrent_binning(drawctxt)) {
+		cmds[index++] = cp_type7_packet(CP_THREAD_CONTROL, 1);
+		cmds[index++] = CP_SET_THREAD_BR;
+	}
+	/* CCU invalidate depth */
+	cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 1);
+	cmds[index++] = 24;
+
+	/* CCU invalidate color */
+	cmds[index++] = cp_type7_packet(CP_EVENT_WRITE, 1);
+	cmds[index++] = 25;
+
+	/* 8 dwords */
+	if (IS_KERNEL_PROFILE(flags)) {
+		index += GEN8_KERNEL_PROFILE(adreno_dev, cmdobj, &cmds[index],
+			retired);
+		index += GEN8_KERNEL_PROFILE_CONTEXT(adreno_dev, cmdobj, &cmds[index],
+			ctx_end);
+	}
+
+	/* 4 dwords */
+	if (IS_USER_PROFILE(flags))
+		index += GEN8_USER_PROFILE_IB(rb, cmdobj, &cmds[index],
+			gpu_ticks_retired);
+
+	cmds[index++] = cp_type7_packet(CP_NOP, 1);
+	cmds[index++] = END_IB_IDENTIFIER;
+
+	ret = gen8_drawctxt_switch(adreno_dev, rb, drawctxt);
+
+	/*
+	 * In the unlikely event of an error in the drawctxt switch,
+	 * treat it like a hang
+	 */
+	if (ret) {
+		/*
+		 * It is "normal" to get a -ENOSPC or a -ENOENT. Don't log it,
+		 * the upper layers know how to handle it
+		 */
+		if (ret != -ENOSPC && ret != -ENOENT)
+			dev_err(device->dev,
+				"Unable to switch draw context: %d\n", ret);
+		goto done;
+	}
+
+	adreno_drawobj_set_constraint(device, drawobj);
+
+	ret = gen8_ringbuffer_addcmds(adreno_dev, drawctxt->rb, drawctxt,
+		flags, cmds, index, drawobj->timestamp, time);
+
+done:
+	trace_kgsl_issueibcmds(device, drawctxt->base.id, numibs,
+		drawobj->timestamp, drawobj->flags, ret, drawctxt->type);
+
+	kvfree(cmds);
+	return ret;
+}
--- a/qcom/opensource/graphics-kernel/adreno_gen8_rpmh.c
+++ b/qcom/opensource/graphics-kernel/adreno_gen8_rpmh.c
@ -0,0 +1,517 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/types.h>
+#include <soc/qcom/cmd-db.h>
+#include <soc/qcom/tcs.h>
+
+#include "adreno.h"
+#include "adreno_gen8.h"
+#include "kgsl_bus.h"
+#include "kgsl_device.h"
+
+struct rpmh_arc_vals {
+	u32 num;
+	const u16 *val;
+};
+
+struct bcm {
+	const char *name;
+	u32 buswidth;
+	u32 channels;
+	u32 unit;
+	u16 width;
+	u8 vcd;
+	bool fixed;
+};
+
+struct bcm_data {
+	__le32 unit;
+	__le16 width;
+	u8 vcd;
+	u8 reserved;
+};
+
+struct rpmh_bw_votes {
+	u32 wait_bitmask;
+	u32 num_cmds;
+	u32 *addrs;
+	u32 num_levels;
+	u32 **cmds;
+};
+
+#define ARC_VOTE_SET(pri, sec, vlvl) \
+	(FIELD_PREP(GENMASK(31, 16), vlvl) | \
+	 FIELD_PREP(GENMASK(15, 8), sec) | \
+	 FIELD_PREP(GENMASK(7, 0), pri))
+
+static int rpmh_arc_cmds(struct rpmh_arc_vals *arc, const char *res_id)
+{
+	size_t len = 0;
+
+	arc->val = cmd_db_read_aux_data(res_id, &len);
+
+	/*
+	 * cmd_db_read_aux_data() gives us a zero-padded table of
+	 * size len that contains the arc values. To determine the
+	 * number of arc values, we loop through the table and count
+	 * them until we get to the end of the buffer or hit the
+	 * zero padding.
+	 */
+	for (arc->num = 1; arc->num < (len >> 1); arc->num++) {
+		if (arc->val[arc->num - 1] != 0 && arc->val[arc->num] == 0)
+			break;
+	}
+
+	return 0;
+}
+
+static int setup_volt_dependency_tbl(u32 *votes,
+		struct rpmh_arc_vals *pri_rail, struct rpmh_arc_vals *sec_rail,
+		u16 *vlvl, u32 num_entries)
+{
+	int i, j, k;
+	uint16_t cur_vlvl;
+	bool found_match;
+
+	/* i tracks current KGSL GPU frequency table entry
+	 * j tracks secondary rail voltage table entry
+	 * k tracks primary rail voltage table entry
+	 */
+	for (i = 0; i < num_entries; i++) {
+		found_match = false;
+
+		/* Look for a primary rail voltage that matches a VLVL level */
+		for (k = 0; k < pri_rail->num; k++) {
+			if (pri_rail->val[k] >= vlvl[i]) {
+				cur_vlvl = pri_rail->val[k];
+				found_match = true;
+				break;
+			}
+		}
+
+		/* If we did not find a matching VLVL level then abort */
+		if (!found_match)
+			return -EINVAL;
+
+		/*
+		 * Look for a secondary rail index whose VLVL value
+		 * is greater than or equal to the VLVL value of the
+		 * corresponding index of the primary rail
+		 */
+		for (j = 0; j < sec_rail->num; j++) {
+			if (sec_rail->val[j] >= cur_vlvl ||
+					j + 1 == sec_rail->num)
+				break;
+		}
+
+		if (j == sec_rail->num)
+			j = 0;
+
+		votes[i] = ARC_VOTE_SET(k, j, cur_vlvl);
+	}
+
+	return 0;
+}
+
+/* Generate a set of bandwidth votes for the list of BCMs */
+static void tcs_cmd_data(struct bcm *bcms, int count,
+		u32 ab, u32 ib, u32 *data, u32 perfmode_vote, bool set_perfmode)
+{
+	int i;
+
+	for (i = 0; i < count; i++) {
+		bool valid = true;
+		bool commit = false;
+		u64 avg, peak, x, y;
+
+		if (i == count - 1 || bcms[i].vcd != bcms[i + 1].vcd)
+			commit = true;
+
+		if (bcms[i].fixed) {
+			if (!ab && !ib)
+				data[i] = BCM_TCS_CMD(commit, false, 0x0, 0x0);
+			else
+				data[i] = BCM_TCS_CMD(commit, true, 0x0,
+							set_perfmode ? perfmode_vote : 0x0);
+			continue;
+		}
+
+		/* Multiple the bandwidth by the width of the connection */
+		avg = ((u64) ab) * bcms[i].width;
+
+		/* And then divide by the total width */
+		do_div(avg, bcms[i].buswidth);
+
+		peak = ((u64) ib) * bcms[i].width;
+		do_div(peak, bcms[i].buswidth);
+
+		/* Input bandwidth value is in KBps */
+		x = avg * 1000ULL;
+		do_div(x, bcms[i].unit);
+
+		/* Input bandwidth value is in KBps */
+		y = peak * 1000ULL;
+		do_div(y, bcms[i].unit);
+
+		/*
+		 * If a bandwidth value was specified but the calculation ends
+		 * rounding down to zero, set a minimum level
+		 */
+		if (ab && x == 0)
+			x = 1;
+
+		if (ib && y == 0)
+			y = 1;
+
+		x = min_t(u64, x, BCM_TCS_CMD_VOTE_MASK);
+		y = min_t(u64, y, BCM_TCS_CMD_VOTE_MASK);
+
+		if (!x && !y)
+			valid = false;
+
+		data[i] = BCM_TCS_CMD(commit, valid, x, y);
+	}
+}
+
+static void free_rpmh_bw_votes(struct rpmh_bw_votes *votes)
+{
+	int i;
+
+	if (!votes)
+		return;
+
+	for (i = 0; votes->cmds && i < votes->num_levels; i++)
+		kfree(votes->cmds[i]);
+
+	kfree(votes->cmds);
+	kfree(votes->addrs);
+	kfree(votes);
+}
+
+/* Build the votes table from the specified bandwidth levels */
+static struct rpmh_bw_votes *build_rpmh_bw_votes(struct bcm *bcms,
+		int bcm_count, u32 *levels, int levels_count, u32 perfmode_vote, u32 perfmode_lvl)
+{
+	struct rpmh_bw_votes *votes;
+	bool set_perfmode;
+	int i;
+
+	votes = kzalloc(sizeof(*votes), GFP_KERNEL);
+	if (!votes)
+		return ERR_PTR(-ENOMEM);
+
+	votes->addrs = kcalloc(bcm_count, sizeof(*votes->cmds), GFP_KERNEL);
+	if (!votes->addrs) {
+		free_rpmh_bw_votes(votes);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	votes->cmds = kcalloc(levels_count, sizeof(*votes->cmds), GFP_KERNEL);
+	if (!votes->cmds) {
+		free_rpmh_bw_votes(votes);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	votes->num_cmds = bcm_count;
+	votes->num_levels = levels_count;
+
+	/* Get the cmd-db information for each BCM */
+	for (i = 0; i < bcm_count; i++) {
+		size_t l;
+		const struct bcm_data *data;
+
+		data = cmd_db_read_aux_data(bcms[i].name, &l);
+
+		votes->addrs[i] = cmd_db_read_addr(bcms[i].name);
+
+		bcms[i].unit = le32_to_cpu(data->unit);
+		bcms[i].width = le16_to_cpu(data->width);
+		bcms[i].vcd = data->vcd;
+	}
+
+	for (i = 0; i < bcm_count; i++) {
+		if (i == (bcm_count - 1) || bcms[i].vcd != bcms[i + 1].vcd)
+			votes->wait_bitmask |= (1 << i);
+	}
+
+	for (i = 0; i < levels_count; i++) {
+		votes->cmds[i] = kcalloc(bcm_count, sizeof(u32), GFP_KERNEL);
+		if (!votes->cmds[i]) {
+			free_rpmh_bw_votes(votes);
+			return ERR_PTR(-ENOMEM);
+		}
+
+		set_perfmode = (i >= perfmode_lvl) ? true : false;
+		tcs_cmd_data(bcms, bcm_count, levels[i], levels[i], votes->cmds[i],
+								perfmode_vote, set_perfmode);
+	}
+
+	return votes;
+}
+
+/*
+ * setup_gmu_arc_votes - Build the gmu voting table
+ * @gmu: Pointer to gmu device
+ * @pri_rail: Pointer to primary power rail vlvl table
+ * @sec_rail: Pointer to second/dependent power rail vlvl table
+ *
+ * This function initializes the cx votes for all gmu frequencies
+ * for gmu dcvs
+ */
+static int setup_cx_arc_votes(struct gen8_gmu_device *gmu,
+	struct rpmh_arc_vals *pri_rail, struct rpmh_arc_vals *sec_rail)
+{
+	/* Hardcoded values of GMU CX voltage levels */
+	u16 gmu_cx_vlvl[MAX_CX_LEVELS];
+	u32 cx_votes[MAX_CX_LEVELS];
+	struct gen8_dcvs_table *table = &gmu->dcvs_table;
+	u32 *freqs = gmu->freqs;
+	u32 *vlvls = gmu->vlvls;
+	int ret, i;
+
+	gmu_cx_vlvl[0] = 0;
+	gmu_cx_vlvl[1] = vlvls[0];
+	gmu_cx_vlvl[2] = vlvls[1];
+
+	table->gmu_level_num = 3;
+
+	table->cx_votes[0].freq = 0;
+	table->cx_votes[1].freq = freqs[0] / 1000;
+	table->cx_votes[2].freq = freqs[1] / 1000;
+
+	ret = setup_volt_dependency_tbl(cx_votes, pri_rail,
+			sec_rail, gmu_cx_vlvl, table->gmu_level_num);
+	if (!ret) {
+		for (i = 0; i < table->gmu_level_num; i++)
+			table->cx_votes[i].vote = cx_votes[i];
+	}
+
+	return ret;
+}
+
+static int to_cx_hlvl(struct rpmh_arc_vals *cx_rail, u32 vlvl, u32 *hlvl)
+{
+	u32 i;
+
+	/*
+	 * This means that the Gx level doesn't have a dependency on Cx level.
+	 * Return the same value to disable cx voting at GMU.
+	 */
+	if (vlvl == 0xffffffff) {
+		*hlvl = vlvl;
+		return 0;
+	}
+
+	for (i = 0; i < cx_rail->num; i++) {
+		if (cx_rail->val[i] >= vlvl) {
+			*hlvl = i;
+			return 0;
+		}
+	}
+
+	return -EINVAL;
+}
+
+/*
+ * setup_gx_arc_votes - Build the gpu dcvs voting table
+ * @hfi: Pointer to hfi device
+ * @pri_rail: Pointer to primary power rail vlvl table
+ * @sec_rail: Pointer to second/dependent power rail vlvl table
+ *
+ * This function initializes the gx votes for all gpu frequencies
+ * for gpu dcvs
+ */
+static int setup_gx_arc_votes(struct adreno_device *adreno_dev,
+	struct rpmh_arc_vals *pri_rail, struct rpmh_arc_vals *sec_rail,
+	struct rpmh_arc_vals *cx_rail)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct gen8_dcvs_table *table = &gmu->dcvs_table;
+	u32 index;
+	u16 vlvl_tbl[MAX_GX_LEVELS];
+	u32 gx_votes[MAX_GX_LEVELS];
+	int ret, i;
+
+	if (pwr->num_pwrlevels + 1 > ARRAY_SIZE(vlvl_tbl)) {
+		dev_err(device->dev,
+			"Defined more GPU DCVS levels than RPMh can support\n");
+		return -ERANGE;
+	}
+
+	/* Add the zero powerlevel for the perf table */
+	table->gpu_level_num = pwr->num_pwrlevels + 1;
+
+	memset(vlvl_tbl, 0, sizeof(vlvl_tbl));
+
+	table->gx_votes[0].freq = 0;
+	table->gx_votes[0].cx_vote = 0;
+	/* Disable cx vote in gmu dcvs table if it is not supported in DT */
+	if (pwr->pwrlevels[0].cx_level == 0xffffffff)
+		table->gx_votes[0].cx_vote = 0xffffffff;
+
+	/* GMU power levels are in ascending order */
+	for (index = 1, i = pwr->num_pwrlevels - 1; i >= 0; i--, index++) {
+		u32 cx_vlvl = pwr->pwrlevels[i].cx_level;
+
+		vlvl_tbl[index] = pwr->pwrlevels[i].voltage_level;
+		table->gx_votes[index].freq = pwr->pwrlevels[i].gpu_freq / 1000;
+
+		ret = to_cx_hlvl(cx_rail, cx_vlvl,
+				&table->gx_votes[index].cx_vote);
+		if (ret) {
+			dev_err(device->dev, "Unsupported cx corner: %u\n",
+					cx_vlvl);
+			return ret;
+		}
+	}
+
+	ret = setup_volt_dependency_tbl(gx_votes, pri_rail,
+			sec_rail, vlvl_tbl, table->gpu_level_num);
+	if (!ret) {
+		for (i = 0; i < table->gpu_level_num; i++)
+			table->gx_votes[i].vote = gx_votes[i];
+	}
+
+	return ret;
+
+}
+
+static int build_dcvs_table(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	struct rpmh_arc_vals gx_arc, cx_arc, mx_arc;
+	int ret;
+
+	ret = rpmh_arc_cmds(&gx_arc, "gfx.lvl");
+	if (ret)
+		return ret;
+
+	ret = rpmh_arc_cmds(&cx_arc, "cx.lvl");
+	if (ret)
+		return ret;
+
+	ret = rpmh_arc_cmds(&mx_arc, "mx.lvl");
+	if (ret)
+		return ret;
+
+	ret = setup_cx_arc_votes(gmu, &cx_arc, &mx_arc);
+	if (ret)
+		return ret;
+
+	return setup_gx_arc_votes(adreno_dev, &gx_arc, &mx_arc, &cx_arc);
+}
+
+/*
+ * List of Bus Control Modules (BCMs) that need to be configured for the GPU
+ * to access DDR. For each bus level we will generate a vote each BC
+ */
+static struct bcm gen8_ddr_bcms[] = {
+	{ .name = "SH0", .buswidth = 16 },
+	{ .name = "MC0", .buswidth = 4 },
+	{ .name = "ACV", .fixed = true },
+};
+
+/* Same as above, but for the CNOC BCMs */
+static struct bcm gen8_cnoc_bcms[] = {
+	{ .name = "CN0", .buswidth = 4 },
+};
+
+static void build_bw_table_cmd(struct hfi_bwtable_cmd *cmd,
+		struct rpmh_bw_votes *ddr, struct rpmh_bw_votes *cnoc)
+{
+	u32 i, j;
+
+	cmd->bw_level_num = ddr->num_levels;
+	cmd->ddr_cmds_num = ddr->num_cmds;
+	cmd->ddr_wait_bitmask = ddr->wait_bitmask;
+
+	for (i = 0; i < ddr->num_cmds; i++)
+		cmd->ddr_cmd_addrs[i] = ddr->addrs[i];
+
+	for (i = 0; i < ddr->num_levels; i++)
+		for (j = 0; j < ddr->num_cmds; j++)
+			cmd->ddr_cmd_data[i][j] = (u32) ddr->cmds[i][j];
+
+	if (!cnoc)
+		return;
+
+	cmd->cnoc_cmds_num = cnoc->num_cmds;
+		cmd->cnoc_wait_bitmask = cnoc->wait_bitmask;
+
+	for (i = 0; i < cnoc->num_cmds; i++)
+		cmd->cnoc_cmd_addrs[i] = cnoc->addrs[i];
+
+	for (i = 0; i < cnoc->num_levels; i++)
+		for (j = 0; j < cnoc->num_cmds; j++)
+			cmd->cnoc_cmd_data[i][j] = (u32) cnoc->cmds[i][j];
+}
+
+/* BIT(2) is used to vote for GPU performance mode through GMU */
+#define ACV_GPU_PERFMODE_VOTE	BIT(2)
+
+static int build_bw_table(struct adreno_device *adreno_dev)
+{
+	struct gen8_gmu_device *gmu = to_gen8_gmu(adreno_dev);
+	const struct adreno_gen8_core *gen8_core = to_gen8_core(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct rpmh_bw_votes *ddr, *cnoc = NULL;
+	u32 perfmode_lvl = kgsl_pwrctrl_get_acv_perfmode_lvl(device,
+			gen8_core->acv_perfmode_ddr_freq);
+	u32 *cnoc_table;
+	u32 count;
+	int ret;
+
+	ddr = build_rpmh_bw_votes(gen8_ddr_bcms, ARRAY_SIZE(gen8_ddr_bcms),
+		pwr->ddr_table, pwr->ddr_table_count, ACV_GPU_PERFMODE_VOTE, perfmode_lvl);
+	if (IS_ERR(ddr))
+		return PTR_ERR(ddr);
+
+	cnoc_table = kgsl_bus_get_table(device->pdev, "qcom,bus-table-cnoc",
+		&count);
+
+	if (count > 0)
+		cnoc = build_rpmh_bw_votes(gen8_cnoc_bcms,
+			ARRAY_SIZE(gen8_cnoc_bcms), cnoc_table, count, 0, 0);
+
+	kfree(cnoc_table);
+
+	if (IS_ERR(cnoc)) {
+		free_rpmh_bw_votes(ddr);
+		return PTR_ERR(cnoc);
+	}
+
+	ret = CMD_MSG_HDR(gmu->hfi.bw_table, H2F_MSG_BW_VOTE_TBL);
+	if (ret)
+		return ret;
+
+	build_bw_table_cmd(&gmu->hfi.bw_table, ddr, cnoc);
+
+	free_rpmh_bw_votes(ddr);
+	free_rpmh_bw_votes(cnoc);
+
+	return 0;
+}
+
+int gen8_build_rpmh_tables(struct adreno_device *adreno_dev)
+{
+	int ret;
+
+	ret = build_dcvs_table(adreno_dev);
+	if (ret) {
+		dev_err(adreno_dev->dev.dev, "Failed to build dcvs table\n");
+		return ret;
+	}
+
+	ret = build_bw_table(adreno_dev);
+	if (ret)
+		dev_err(adreno_dev->dev.dev, "Failed to build bw table\n");
+
+	return ret;
+}
--- a/qcom/opensource/graphics-kernel/adreno_gen8_snapshot.c
+++ b/qcom/opensource/graphics-kernel/adreno_gen8_snapshot.c
--- a/qcom/opensource/graphics-kernel/adreno_gen8_snapshot.h
+++ b/qcom/opensource/graphics-kernel/adreno_gen8_snapshot.h
@ -0,0 +1,656 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+#ifndef __ADRENO_GEN8_SNAPSHOT_H
+#define __ADRENO_GEN8_SNAPSHOT_H
+
+#include "adreno.h"
+#include "adreno_gen8.h"
+#include "kgsl_regmap.h"
+#include "kgsl_snapshot.h"
+
+enum cluster_id {
+	CLUSTER_NONE   = 0,
+	CLUSTER_FE_US  = 1,
+	CLUSTER_FE_S   = 2,
+	CLUSTER_SP_VS  = 3,
+	CLUSTER_VPC_VS = 4,
+	CLUSTER_VPC_US = 5,
+	CLUSTER_GRAS   = 6,
+	CLUSTER_SP_PS  = 7,
+	CLUSTER_VPC_PS = 8,
+	CLUSTER_PS     = 9,
+};
+
+enum location_id {
+	HLSQ_STATE  = 0,
+	HLSQ_DP     = 1,
+	SP_TOP      = 2,
+	USPTP       = 3,
+	HLSQ_DP_STR = 4,
+};
+
+#define STATE_NON_CONTEXT     0
+#define STATE_TOGGLE_CTXT     1
+#define STATE_FORCE_CTXT_0    2
+#define STATE_FORCE_CTXT_1    3
+
+#define UNSLICE                 0
+#define SLICE                   1
+
+#define MAX_PHYSICAL_SLICES     1
+
+#define NUMBER_OF_SLICES(region) ((region == SLICE) ? MAX_PHYSICAL_SLICES : 1)
+#define SLICE_ID(region, j) ((region == SLICE) ? j : UINT_MAX)
+
+#define GEN8_DEBUGBUS_BLOCK_SIZE 0x100
+
+/* Number of dword to dump in snapshot for CP SQE */
+#define GEN8_SQE_FW_SNAPSHOT_DWORDS 5
+
+struct sel_reg {
+	u32 host_reg;
+	u32 cd_reg;
+	u32 val;
+};
+
+struct gen8_shader_block_info {
+	struct gen8_shader_block *block;
+	u32 sp_id;
+	u32 usptp;
+	u32 slice_id;
+	u32 location_id;
+	u32 context_id;
+	u32 bank;
+	u64 offset;
+};
+
+struct gen8_shader_block {
+	/* statetype: Type identifier for the block */
+	u32 statetype;
+	/* size: Size of the block (in dwords) */
+	u32 size;
+	/* num_sps: The number of SPs to dump */
+	u32 num_sps;
+	/* num_usptps: The number of USPTPs to dump */
+	u32 num_usptps;
+	/* pipeid: Pipe identifier for the block data  */
+	u32 pipeid;
+	/* location: Location identifier for the block data */
+	u32 location;
+	/* num_slices: the number of slices to dump */
+	u32 num_slices;
+	/* num_ctx: repeat id to loop */
+	u32 num_ctx;
+	/* offset: The offset in the snasphot dump */
+	u64 offset;
+};
+
+struct gen8_cluster_registers_info {
+	struct gen8_cluster_registers *cluster;
+	u32 cluster_id;
+	u32 slice_id;
+	u32 pipe_id;
+	u32 context_id;
+	u64 offset;
+};
+
+struct gen8_cluster_registers {
+	/* cluster_id: Cluster identifier */
+	u32 cluster_id;
+	/* slice_region: is it slice or unslice */
+	u32 slice_region;
+	/* pipe_id: Pipe Identifier */
+	u32 pipe_id;
+	/* context_id: one of STATE_ that identifies the context to dump */
+	u32 context_id;
+	/* regs: Pointer to an array of register pairs */
+	const u32 *regs;
+	/* sel: Pointer to a selector register to write before reading */
+	const struct sel_reg *sel;
+	/* offset: Internal variable to track the state of the crashdump */
+	u32 offset;
+};
+
+struct gen8_reg_list_info {
+	struct gen8_reg_list *regs;
+	u32 cluster_id;
+	u32 slice_id;
+	u32 pipe_id;
+	u32 sp_id;
+	u32 usptp_id;
+	u32 context_id;
+	u64 offset;
+};
+
+struct gen8_sptp_cluster_registers_info {
+	struct gen8_sptp_cluster_registers *cluster;
+	u32 cluster_id;
+	u32 slice_id;
+	u32 pipe_id;
+	u32 sp_id;
+	u32 usptp_id;
+	u32 location_id;
+	u32 context_id;
+	u32 statetype_id;
+	u64 offset;
+};
+
+struct gen8_sptp_cluster_registers {
+	/* cluster_id: Cluster identifier */
+	u32 cluster_id;
+	/* slice_region: is it slice or unslice */
+	u32 slice_region;
+	/* num_sps: The number of SPs to dump */
+	u32 num_sps;
+	/* num_usptps: The number of USPs to dump */
+	u32 num_usptps;
+	/* statetype: SP block state type for the cluster */
+	u32 statetype;
+	/* pipe_id: Pipe identifier */
+	u32 pipe_id;
+	/* context_id: Context identifier */
+	u32 context_id;
+	/* location_id: Location identifier */
+	u32 location_id;
+	/* regs: Pointer to the list of register pairs to read */
+	const u32 *regs;
+	/* regbase: Dword offset of the register block in the GPu register space */
+	u32 regbase;
+	/* offset: Internal variable used to track the crashdump state */
+	u32 offset;
+};
+
+struct gen8_cp_indexed_reg {
+	u32 addr;
+	u32 data;
+	u32 slice_region;
+	u32 pipe_id;
+	u32 size;
+};
+
+struct gen8_reg_list {
+	u32 slice_region;
+	const u32 *regs;
+	const struct sel_reg *sel;
+	u64 offset;
+};
+
+struct gen8_trace_buffer_info {
+	u16 dbgc_ctrl;
+	u16 segment;
+	u16 granularity;
+	u16 ping_blk[TRACE_BUF_NUM_SIG];
+	u16 ping_idx[TRACE_BUF_NUM_SIG];
+};
+
+enum gen8_debugbus_ids {
+	DEBUGBUS_GBIF_CX_GC_US_I_0          = 1,
+	DEBUGBUS_GMU_CX_GC_US_I_0           = 2,
+	DEBUGBUS_CX_GC_US_I_0               = 3,
+	DEBUGBUS_GBIF_GX_GC_US_I_0          = 8,
+	DEBUGBUS_GMU_GX_GC_US_I_0           = 9,
+	DEBUGBUS_DBGC_GC_US_I_0             = 10,
+	DEBUGBUS_RBBM_GC_US_I_0             = 11,
+	DEBUGBUS_LARC_GC_US_I_0             = 12,
+	DEBUGBUS_COM_GC_US_I_0              = 13,
+	DEBUGBUS_HLSQ_GC_US_I_0             = 14,
+	DEBUGBUS_CGC_GC_US_I_0              = 15,
+	DEBUGBUS_VSC_GC_US_I_0_0            = 20,
+	DEBUGBUS_VSC_GC_US_I_0_1            = 21,
+	DEBUGBUS_UFC_GC_US_I_0              = 24,
+	DEBUGBUS_UFC_GC_US_I_1              = 25,
+	DEBUGBUS_CP_GC_US_I_0_0             = 40,
+	DEBUGBUS_CP_GC_US_I_0_1             = 41,
+	DEBUGBUS_CP_GC_US_I_0_2             = 42,
+	DEBUGBUS_PC_BR_US_I_0               = 56,
+	DEBUGBUS_PC_BV_US_I_0               = 57,
+	DEBUGBUS_GPC_BR_US_I_0              = 58,
+	DEBUGBUS_GPC_BV_US_I_0              = 59,
+	DEBUGBUS_VPC_BR_US_I_0              = 60,
+	DEBUGBUS_VPC_BV_US_I_0              = 61,
+	DEBUGBUS_UCHE_WRAPPER_GC_US_I_0     = 80,
+	DEBUGBUS_UCHE_GC_US_I_0             = 81,
+	DEBUGBUS_UCHE_GC_US_I_1             = 82,
+	DEBUGBUS_CP_GC_S_0_I_0              = 128,
+	DEBUGBUS_PC_BR_S_0_I_0              = 129,
+	DEBUGBUS_PC_BV_S_0_I_0              = 130,
+	DEBUGBUS_TESS_GC_S_0_I_0            = 131,
+	DEBUGBUS_TSEFE_GC_S_0_I_0           = 132,
+	DEBUGBUS_TSEBE_GC_S_0_I_0           = 133,
+	DEBUGBUS_RAS_GC_S_0_I_0             = 134,
+	DEBUGBUS_LRZ_BR_S_0_I_0             = 135,
+	DEBUGBUS_LRZ_BV_S_0_I_0             = 136,
+	DEBUGBUS_VFDP_GC_S_0_I_0            = 137,
+	DEBUGBUS_GPC_BR_S_0_I_0             = 138,
+	DEBUGBUS_GPC_BV_S_0_I_0             = 139,
+	DEBUGBUS_VPCFE_BR_S_0_I_0           = 140,
+	DEBUGBUS_VPCFE_BV_S_0_I_0           = 141,
+	DEBUGBUS_VPCBE_BR_S_0_I_0           = 142,
+	DEBUGBUS_VPCBE_BV_S_0_I_0           = 143,
+	DEBUGBUS_CCHE_GC_S_0_I_0            = 144,
+	DEBUGBUS_DBGC_GC_S_0_I_0            = 145,
+	DEBUGBUS_LARC_GC_S_0_I_0            = 146,
+	DEBUGBUS_RBBM_GC_S_0_I_0            = 147,
+	DEBUGBUS_CCRE_GC_S_0_I_0            = 148,
+	DEBUGBUS_CGC_GC_S_0_I_0             = 149,
+	DEBUGBUS_GMU_GC_S_0_I_0             = 150,
+	DEBUGBUS_SLICE_GC_S_0_I_0           = 151,
+	DEBUGBUS_HLSQ_SPTP_STAR_GC_S_0_I_0  = 152,
+	DEBUGBUS_USP_GC_S_0_I_0             = 160,
+	DEBUGBUS_USP_GC_S_0_I_1             = 161,
+	DEBUGBUS_USPTP_GC_S_0_I_0           = 166,
+	DEBUGBUS_USPTP_GC_S_0_I_1           = 167,
+	DEBUGBUS_USPTP_GC_S_0_I_2           = 168,
+	DEBUGBUS_USPTP_GC_S_0_I_3           = 169,
+	DEBUGBUS_TP_GC_S_0_I_0              = 178,
+	DEBUGBUS_TP_GC_S_0_I_1              = 179,
+	DEBUGBUS_TP_GC_S_0_I_2              = 180,
+	DEBUGBUS_TP_GC_S_0_I_3              = 181,
+	DEBUGBUS_RB_GC_S_0_I_0              = 190,
+	DEBUGBUS_RB_GC_S_0_I_1              = 191,
+	DEBUGBUS_CCU_GC_S_0_I_0             = 196,
+	DEBUGBUS_CCU_GC_S_0_I_1             = 197,
+	DEBUGBUS_HLSQ_GC_S_0_I_0            = 202,
+	DEBUGBUS_HLSQ_GC_S_0_I_1            = 203,
+	DEBUGBUS_VFD_GC_S_0_I_0             = 208,
+	DEBUGBUS_VFD_GC_S_0_I_1             = 209,
+	DEBUGBUS_CP_GC_S_1_I_0              = 256,
+	DEBUGBUS_PC_BR_S_1_I_0              = 257,
+	DEBUGBUS_PC_BV_S_1_I_0              = 258,
+	DEBUGBUS_TESS_GC_S_1_I_0            = 259,
+	DEBUGBUS_TSEFE_GC_S_1_I_0           = 260,
+	DEBUGBUS_TSEBE_GC_S_1_I_0           = 261,
+	DEBUGBUS_RAS_GC_S_1_I_0             = 262,
+	DEBUGBUS_LRZ_BR_S_1_I_0             = 263,
+	DEBUGBUS_LRZ_BV_S_1_I_0             = 264,
+	DEBUGBUS_VFDP_GC_S_1_I_0            = 265,
+	DEBUGBUS_GPC_BR_S_1_I_0             = 266,
+	DEBUGBUS_GPC_BV_S_1_I_0             = 267,
+	DEBUGBUS_VPCFE_BR_S_1_I_0           = 268,
+	DEBUGBUS_VPCFE_BV_S_1_I_0           = 269,
+	DEBUGBUS_VPCBE_BR_S_1_I_0           = 270,
+	DEBUGBUS_VPCBE_BV_S_1_I_0           = 271,
+	DEBUGBUS_CCHE_GC_S_1_I_0            = 272,
+	DEBUGBUS_DBGC_GC_S_1_I_0            = 273,
+	DEBUGBUS_LARC_GC_S_1_I_0            = 274,
+	DEBUGBUS_RBBM_GC_S_1_I_0            = 275,
+	DEBUGBUS_CCRE_GC_S_1_I_0            = 276,
+	DEBUGBUS_CGC_GC_S_1_I_0             = 277,
+	DEBUGBUS_GMU_GC_S_1_I_0             = 278,
+	DEBUGBUS_SLICE_GC_S_1_I_0           = 279,
+	DEBUGBUS_HLSQ_SPTP_STAR_GC_S_1_I_0  = 280,
+	DEBUGBUS_USP_GC_S_1_I_0             = 288,
+	DEBUGBUS_USP_GC_S_1_I_1             = 289,
+	DEBUGBUS_USPTP_GC_S_1_I_0           = 294,
+	DEBUGBUS_USPTP_GC_S_1_I_1           = 295,
+	DEBUGBUS_USPTP_GC_S_1_I_2           = 296,
+	DEBUGBUS_USPTP_GC_S_1_I_3           = 297,
+	DEBUGBUS_TP_GC_S_1_I_0              = 306,
+	DEBUGBUS_TP_GC_S_1_I_1              = 307,
+	DEBUGBUS_TP_GC_S_1_I_2              = 308,
+	DEBUGBUS_TP_GC_S_1_I_3              = 309,
+	DEBUGBUS_RB_GC_S_1_I_0              = 318,
+	DEBUGBUS_RB_GC_S_1_I_1              = 319,
+	DEBUGBUS_CCU_GC_S_1_I_0             = 324,
+	DEBUGBUS_CCU_GC_S_1_I_1             = 325,
+	DEBUGBUS_HLSQ_GC_S_1_I_0            = 330,
+	DEBUGBUS_HLSQ_GC_S_1_I_1            = 331,
+	DEBUGBUS_VFD_GC_S_1_I_0             = 336,
+	DEBUGBUS_VFD_GC_S_1_I_1             = 337,
+	DEBUGBUS_CP_GC_S_2_I_0              = 384,
+	DEBUGBUS_PC_BR_S_2_I_0              = 385,
+	DEBUGBUS_PC_BV_S_2_I_0              = 386,
+	DEBUGBUS_TESS_GC_S_2_I_0            = 387,
+	DEBUGBUS_TSEFE_GC_S_2_I_0           = 388,
+	DEBUGBUS_TSEBE_GC_S_2_I_0           = 389,
+	DEBUGBUS_RAS_GC_S_2_I_0             = 390,
+	DEBUGBUS_LRZ_BR_S_2_I_0             = 391,
+	DEBUGBUS_LRZ_BV_S_2_I_0             = 392,
+	DEBUGBUS_VFDP_GC_S_2_I_0            = 393,
+	DEBUGBUS_GPC_BR_S_2_I_0             = 394,
+	DEBUGBUS_GPC_BV_S_2_I_0             = 395,
+	DEBUGBUS_VPCFE_BR_S_2_I_0           = 396,
+	DEBUGBUS_VPCFE_BV_S_2_I_0           = 397,
+	DEBUGBUS_VPCBE_BR_S_2_I_0           = 398,
+	DEBUGBUS_VPCBE_BV_S_2_I_0           = 399,
+	DEBUGBUS_CCHE_GC_S_2_I_0            = 400,
+	DEBUGBUS_DBGC_GC_S_2_I_0            = 401,
+	DEBUGBUS_LARC_GC_S_2_I_0            = 402,
+	DEBUGBUS_RBBM_GC_S_2_I_0            = 403,
+	DEBUGBUS_CCRE_GC_S_2_I_0            = 404,
+	DEBUGBUS_CGC_GC_S_2_I_0             = 405,
+	DEBUGBUS_GMU_GC_S_2_I_0             = 406,
+	DEBUGBUS_SLICE_GC_S_2_I_0           = 407,
+	DEBUGBUS_HLSQ_SPTP_STAR_GC_S_2_I_0  = 408,
+	DEBUGBUS_USP_GC_S_2_I_0             = 416,
+	DEBUGBUS_USP_GC_S_2_I_1             = 417,
+	DEBUGBUS_USPTP_GC_S_2_I_0           = 422,
+	DEBUGBUS_USPTP_GC_S_2_I_1           = 423,
+	DEBUGBUS_USPTP_GC_S_2_I_2           = 424,
+	DEBUGBUS_USPTP_GC_S_2_I_3           = 425,
+	DEBUGBUS_TP_GC_S_2_I_0              = 434,
+	DEBUGBUS_TP_GC_S_2_I_1              = 435,
+	DEBUGBUS_TP_GC_S_2_I_2              = 436,
+	DEBUGBUS_TP_GC_S_2_I_3              = 437,
+	DEBUGBUS_RB_GC_S_2_I_0              = 446,
+	DEBUGBUS_RB_GC_S_2_I_1              = 447,
+	DEBUGBUS_CCU_GC_S_2_I_0             = 452,
+	DEBUGBUS_CCU_GC_S_2_I_1             = 453,
+	DEBUGBUS_HLSQ_GC_S_2_I_0            = 458,
+	DEBUGBUS_HLSQ_GC_S_2_I_1            = 459,
+	DEBUGBUS_VFD_GC_S_2_I_0             = 464,
+	DEBUGBUS_VFD_GC_S_2_I_1             = 465,
+};
+
+static const u32 gen8_debugbus_blocks[] = {
+	DEBUGBUS_GMU_GX_GC_US_I_0,
+	DEBUGBUS_DBGC_GC_US_I_0,
+	DEBUGBUS_RBBM_GC_US_I_0,
+	DEBUGBUS_LARC_GC_US_I_0,
+	DEBUGBUS_COM_GC_US_I_0,
+	DEBUGBUS_HLSQ_GC_US_I_0,
+	DEBUGBUS_CGC_GC_US_I_0,
+	DEBUGBUS_VSC_GC_US_I_0_0,
+	DEBUGBUS_VSC_GC_US_I_0_1,
+	DEBUGBUS_UFC_GC_US_I_0,
+	DEBUGBUS_UFC_GC_US_I_1,
+	DEBUGBUS_CP_GC_US_I_0_0,
+	DEBUGBUS_CP_GC_US_I_0_1,
+	DEBUGBUS_CP_GC_US_I_0_2,
+	DEBUGBUS_PC_BR_US_I_0,
+	DEBUGBUS_PC_BV_US_I_0,
+	DEBUGBUS_GPC_BR_US_I_0,
+	DEBUGBUS_GPC_BV_US_I_0,
+	DEBUGBUS_VPC_BR_US_I_0,
+	DEBUGBUS_VPC_BV_US_I_0,
+	DEBUGBUS_UCHE_WRAPPER_GC_US_I_0,
+	DEBUGBUS_UCHE_GC_US_I_0,
+	DEBUGBUS_UCHE_GC_US_I_1,
+	DEBUGBUS_CP_GC_S_0_I_0,
+	DEBUGBUS_PC_BR_S_0_I_0,
+	DEBUGBUS_PC_BV_S_0_I_0,
+	DEBUGBUS_TESS_GC_S_0_I_0,
+	DEBUGBUS_TSEFE_GC_S_0_I_0,
+	DEBUGBUS_TSEBE_GC_S_0_I_0,
+	DEBUGBUS_RAS_GC_S_0_I_0,
+	DEBUGBUS_LRZ_BR_S_0_I_0,
+	DEBUGBUS_LRZ_BV_S_0_I_0,
+	DEBUGBUS_VFDP_GC_S_0_I_0,
+	DEBUGBUS_GPC_BR_S_0_I_0,
+	DEBUGBUS_GPC_BV_S_0_I_0,
+	DEBUGBUS_VPCFE_BR_S_0_I_0,
+	DEBUGBUS_VPCFE_BV_S_0_I_0,
+	DEBUGBUS_VPCBE_BR_S_0_I_0,
+	DEBUGBUS_VPCBE_BV_S_0_I_0,
+	DEBUGBUS_CCHE_GC_S_0_I_0,
+	DEBUGBUS_DBGC_GC_S_0_I_0,
+	DEBUGBUS_LARC_GC_S_0_I_0,
+	DEBUGBUS_RBBM_GC_S_0_I_0,
+	DEBUGBUS_CCRE_GC_S_0_I_0,
+	DEBUGBUS_CGC_GC_S_0_I_0,
+	DEBUGBUS_GMU_GC_S_0_I_0,
+	DEBUGBUS_SLICE_GC_S_0_I_0,
+	DEBUGBUS_HLSQ_SPTP_STAR_GC_S_0_I_0,
+	DEBUGBUS_USP_GC_S_0_I_0,
+	DEBUGBUS_USP_GC_S_0_I_1,
+	DEBUGBUS_USPTP_GC_S_0_I_0,
+	DEBUGBUS_USPTP_GC_S_0_I_1,
+	DEBUGBUS_USPTP_GC_S_0_I_2,
+	DEBUGBUS_USPTP_GC_S_0_I_3,
+	DEBUGBUS_TP_GC_S_0_I_0,
+	DEBUGBUS_TP_GC_S_0_I_1,
+	DEBUGBUS_TP_GC_S_0_I_2,
+	DEBUGBUS_TP_GC_S_0_I_3,
+	DEBUGBUS_RB_GC_S_0_I_0,
+	DEBUGBUS_RB_GC_S_0_I_1,
+	DEBUGBUS_CCU_GC_S_0_I_0,
+	DEBUGBUS_CCU_GC_S_0_I_1,
+	DEBUGBUS_HLSQ_GC_S_0_I_0,
+	DEBUGBUS_HLSQ_GC_S_0_I_1,
+	DEBUGBUS_VFD_GC_S_0_I_0,
+	DEBUGBUS_VFD_GC_S_0_I_1,
+	DEBUGBUS_CP_GC_S_1_I_0,
+	DEBUGBUS_PC_BR_S_1_I_0,
+	DEBUGBUS_PC_BV_S_1_I_0,
+	DEBUGBUS_TESS_GC_S_1_I_0,
+	DEBUGBUS_TSEFE_GC_S_1_I_0,
+	DEBUGBUS_TSEBE_GC_S_1_I_0,
+	DEBUGBUS_RAS_GC_S_1_I_0,
+	DEBUGBUS_LRZ_BR_S_1_I_0,
+	DEBUGBUS_LRZ_BV_S_1_I_0,
+	DEBUGBUS_VFDP_GC_S_1_I_0,
+	DEBUGBUS_GPC_BR_S_1_I_0,
+	DEBUGBUS_GPC_BV_S_1_I_0,
+	DEBUGBUS_VPCFE_BR_S_1_I_0,
+	DEBUGBUS_VPCFE_BV_S_1_I_0,
+	DEBUGBUS_VPCBE_BR_S_1_I_0,
+	DEBUGBUS_VPCBE_BV_S_1_I_0,
+	DEBUGBUS_CCHE_GC_S_1_I_0,
+	DEBUGBUS_DBGC_GC_S_1_I_0,
+	DEBUGBUS_LARC_GC_S_1_I_0,
+	DEBUGBUS_RBBM_GC_S_1_I_0,
+	DEBUGBUS_CCRE_GC_S_1_I_0,
+	DEBUGBUS_CGC_GC_S_1_I_0,
+	DEBUGBUS_GMU_GC_S_1_I_0,
+	DEBUGBUS_SLICE_GC_S_1_I_0,
+	DEBUGBUS_HLSQ_SPTP_STAR_GC_S_1_I_0,
+	DEBUGBUS_USP_GC_S_1_I_0,
+	DEBUGBUS_USP_GC_S_1_I_1,
+	DEBUGBUS_USPTP_GC_S_1_I_0,
+	DEBUGBUS_USPTP_GC_S_1_I_1,
+	DEBUGBUS_USPTP_GC_S_1_I_2,
+	DEBUGBUS_USPTP_GC_S_1_I_3,
+	DEBUGBUS_TP_GC_S_1_I_0,
+	DEBUGBUS_TP_GC_S_1_I_1,
+	DEBUGBUS_TP_GC_S_1_I_2,
+	DEBUGBUS_TP_GC_S_1_I_3,
+	DEBUGBUS_RB_GC_S_1_I_0,
+	DEBUGBUS_RB_GC_S_1_I_1,
+	DEBUGBUS_CCU_GC_S_1_I_0,
+	DEBUGBUS_CCU_GC_S_1_I_1,
+	DEBUGBUS_HLSQ_GC_S_1_I_0,
+	DEBUGBUS_HLSQ_GC_S_1_I_1,
+	DEBUGBUS_VFD_GC_S_1_I_0,
+	DEBUGBUS_VFD_GC_S_1_I_1,
+	DEBUGBUS_CP_GC_S_2_I_0,
+	DEBUGBUS_PC_BR_S_2_I_0,
+	DEBUGBUS_PC_BV_S_2_I_0,
+	DEBUGBUS_TESS_GC_S_2_I_0,
+	DEBUGBUS_TSEFE_GC_S_2_I_0,
+	DEBUGBUS_TSEBE_GC_S_2_I_0,
+	DEBUGBUS_RAS_GC_S_2_I_0,
+	DEBUGBUS_LRZ_BR_S_2_I_0,
+	DEBUGBUS_LRZ_BV_S_2_I_0,
+	DEBUGBUS_VFDP_GC_S_2_I_0,
+	DEBUGBUS_GPC_BR_S_2_I_0,
+	DEBUGBUS_GPC_BV_S_2_I_0,
+	DEBUGBUS_VPCFE_BR_S_2_I_0,
+	DEBUGBUS_VPCFE_BV_S_2_I_0,
+	DEBUGBUS_VPCBE_BR_S_2_I_0,
+	DEBUGBUS_VPCBE_BV_S_2_I_0,
+	DEBUGBUS_CCHE_GC_S_2_I_0,
+	DEBUGBUS_DBGC_GC_S_2_I_0,
+	DEBUGBUS_LARC_GC_S_2_I_0,
+	DEBUGBUS_RBBM_GC_S_2_I_0,
+	DEBUGBUS_CCRE_GC_S_2_I_0,
+	DEBUGBUS_CGC_GC_S_2_I_0,
+	DEBUGBUS_GMU_GC_S_2_I_0,
+	DEBUGBUS_SLICE_GC_S_2_I_0,
+	DEBUGBUS_HLSQ_SPTP_STAR_GC_S_2_I_0,
+	DEBUGBUS_USP_GC_S_2_I_0,
+	DEBUGBUS_USP_GC_S_2_I_1,
+	DEBUGBUS_USPTP_GC_S_2_I_0,
+	DEBUGBUS_USPTP_GC_S_2_I_1,
+	DEBUGBUS_USPTP_GC_S_2_I_2,
+	DEBUGBUS_USPTP_GC_S_2_I_3,
+	DEBUGBUS_TP_GC_S_2_I_0,
+	DEBUGBUS_TP_GC_S_2_I_1,
+	DEBUGBUS_TP_GC_S_2_I_2,
+	DEBUGBUS_TP_GC_S_2_I_3,
+	DEBUGBUS_RB_GC_S_2_I_0,
+	DEBUGBUS_RB_GC_S_2_I_1,
+	DEBUGBUS_CCU_GC_S_2_I_0,
+	DEBUGBUS_CCU_GC_S_2_I_1,
+	DEBUGBUS_HLSQ_GC_S_2_I_0,
+	DEBUGBUS_HLSQ_GC_S_2_I_1,
+	DEBUGBUS_VFD_GC_S_2_I_0,
+	DEBUGBUS_VFD_GC_S_2_I_1,
+};
+
+static const u32 gen8_gbif_debugbus_blocks[] = {
+	DEBUGBUS_GBIF_GX_GC_US_I_0,
+};
+
+static const u32 gen8_cx_debugbus_blocks[] = {
+	DEBUGBUS_GBIF_CX_GC_US_I_0,
+	DEBUGBUS_GMU_CX_GC_US_I_0,
+	DEBUGBUS_CX_GC_US_I_0,
+};
+
+enum gen8_statetype_ids {
+	TP0_NCTX_REG                   = 0,
+	TP0_CTX0_3D_CVS_REG            = 1,
+	TP0_CTX0_3D_CPS_REG            = 2,
+	TP0_CTX1_3D_CVS_REG            = 3,
+	TP0_CTX1_3D_CPS_REG            = 4,
+	TP0_CTX2_3D_CPS_REG            = 5,
+	TP0_CTX3_3D_CPS_REG            = 6,
+	TP0_TMO_DATA                   = 9,
+	TP0_SMO_DATA                   = 10,
+	TP0_MIPMAP_BASE_DATA           = 11,
+	SP_INST_DATA_3                 = 31,
+	SP_NCTX_REG                    = 32,
+	SP_CTX0_3D_CVS_REG             = 33,
+	SP_CTX0_3D_CPS_REG             = 34,
+	SP_CTX1_3D_CVS_REG             = 35,
+	SP_CTX1_3D_CPS_REG             = 36,
+	SP_CTX2_3D_CPS_REG             = 37,
+	SP_CTX3_3D_CPS_REG             = 38,
+	SP_INST_DATA                   = 39,
+	SP_INST_DATA_1                 = 40,
+	SP_LB_0_DATA                   = 41,
+	SP_LB_1_DATA                   = 42,
+	SP_LB_2_DATA                   = 43,
+	SP_LB_3_DATA                   = 44,
+	SP_LB_4_DATA                   = 45,
+	SP_LB_5_DATA                   = 46,
+	SP_LB_6_DATA                   = 47,
+	SP_LB_7_DATA                   = 48,
+	SP_CB_RAM                      = 49,
+	SP_LB_13_DATA                  = 50,
+	SP_LB_14_DATA                  = 51,
+	SP_INST_TAG                    = 52,
+	SP_INST_DATA_2                 = 53,
+	SP_TMO_TAG                     = 54,
+	SP_SMO_TAG                     = 55,
+	SP_STATE_DATA                  = 56,
+	SP_HWAVE_RAM                   = 57,
+	SP_L0_INST_BUF                 = 58,
+	SP_LB_8_DATA                   = 59,
+	SP_LB_9_DATA                   = 60,
+	SP_LB_10_DATA                  = 61,
+	SP_LB_11_DATA                  = 62,
+	SP_LB_12_DATA                  = 63,
+	HLSQ_DATAPATH_DSTR_META        = 64,
+	HLSQ_DESC_REMAP_META           = 65,
+	HLSQ_SLICE_TOP_META            = 66,
+	HLSQ_L2STC_TAG_RAM             = 67,
+	HLSQ_L2STC_INFO_CMD            = 68,
+	HLSQ_CVS_BE_CTXT_BUF_RAM_TAG   = 69,
+	HLSQ_CPS_BE_CTXT_BUF_RAM_TAG   = 70,
+	HLSQ_GFX_CVS_BE_CTXT_BUF_RAM   = 71,
+	HLSQ_GFX_CPS_BE_CTXT_BUF_RAM   = 72,
+	HLSQ_CHUNK_CVS_RAM             = 73,
+	HLSQ_CHUNK_CPS_RAM             = 74,
+	HLSQ_CHUNK_CVS_RAM_TAG         = 75,
+	HLSQ_CHUNK_CPS_RAM_TAG         = 76,
+	HLSQ_ICB_CVS_CB_BASE_TAG       = 77,
+	HLSQ_ICB_CPS_CB_BASE_TAG       = 78,
+	HLSQ_CVS_MISC_RAM              = 79,
+	HLSQ_CPS_MISC_RAM              = 80,
+	HLSQ_CPS_MISC_RAM_1            = 81,
+	HLSQ_INST_RAM                  = 82,
+	HLSQ_GFX_CVS_CONST_RAM         = 83,
+	HLSQ_GFX_CPS_CONST_RAM         = 84,
+	HLSQ_CVS_MISC_RAM_TAG          = 85,
+	HLSQ_CPS_MISC_RAM_TAG          = 86,
+	HLSQ_INST_RAM_TAG              = 87,
+	HLSQ_GFX_CVS_CONST_RAM_TAG     = 88,
+	HLSQ_GFX_CPS_CONST_RAM_TAG     = 89,
+	HLSQ_GFX_LOCAL_MISC_RAM        = 90,
+	HLSQ_GFX_LOCAL_MISC_RAM_TAG    = 91,
+	HLSQ_INST_RAM_1                = 92,
+	HLSQ_STPROC_META               = 93,
+	HLSQ_SLICE_BACKEND_META        = 94,
+	HLSQ_INST_RAM_2                = 95,
+	HLSQ_DATAPATH_META             = 96,
+	HLSQ_FRONTEND_META             = 97,
+	HLSQ_INDIRECT_META             = 98,
+	HLSQ_BACKEND_META              = 99,
+};
+
+struct gen8_snapshot_block_list {
+	/* pre_crashdumper_regs : Registers which need to be dumped before CD runs */
+	struct gen8_reg_list *pre_crashdumper_regs;
+	/* pre_crashdumper_regs_size : Size of registers which need to be dumped before CD runs */
+	size_t num_pre_crashdumper_regs;
+	/* debugbus_blocks : List of debugbus blocks */
+	const u32 *debugbus_blocks;
+	/* debugbus_blocks_len : Length of the debugbus list */
+	size_t debugbus_blocks_len;
+	/* gbif_debugbus_blocks : List of GBIF debugbus blocks */
+	const u32 *gbif_debugbus_blocks;
+	/* gbif_debugbus_blocks_len : Length of GBIF debugbus list */
+	size_t gbif_debugbus_blocks_len;
+	/* cx_debugbus_blocks : List of CX debugbus blocks */
+	const u32 *cx_debugbus_blocks;
+	/* cx_debugbus_blocks_len : Length of the CX debugbus list */
+	size_t cx_debugbus_blocks_len;
+	/* external_core_regs : List of external core registers */
+	const u32 **external_core_regs;
+	/* num_external_core_regs : length of external core registers list */
+	size_t num_external_core_regs;
+	/* gmu_cx_unsliced_regs : List of GMU CX unsliced registers */
+	const u32 *gmu_cx_unsliced_regs;
+	/* gmu_gx_registers : List of GMU registers */
+	struct gen8_reg_list *gmu_gx_regs;
+	/* num_gmu_gx_regs : Length of GMU registers list */
+	size_t num_gmu_gx_regs;
+	/* rscc_regs : List of RSCC registers */
+	const u32 *rscc_regs;
+	/* reg_list : List of GPU internal registers */
+	struct gen8_reg_list *reg_list;
+	/* reg_list : List of cx_misc registers */
+	const u32 *cx_misc_regs;
+	/* shader_blocks : List of GPU shader memory */
+	struct gen8_shader_block *shader_blocks;
+	/* num_shader_blocks : Length of the shader memory list */
+	size_t num_shader_blocks;
+	/* cp_cluster_registers : List of GPU CP cluster registers */
+	struct gen8_cluster_registers *cp_clusters;
+	/* num_cp_clusters : Length of GPU CP cluster registers list */
+	size_t num_cp_clusters;
+	/* cluster_registers : List of GPU cluster registers */
+	struct gen8_cluster_registers *clusters;
+	/* num_clusters : Length of GPU cluster registers list */
+	size_t num_clusters;
+	/* spstp_cluster_registers : List of GPU SPTP cluster registers */
+	struct gen8_sptp_cluster_registers *sptp_clusters;
+	/* num_sptp_clusters : Length of GPU SPTP cluster registers list */
+	size_t num_sptp_clusters;
+	/* post_crashdumper_regs : Registers which need to be dumped after CD runs */
+	const u32 *post_crashdumper_regs;
+	/* index_registers : List of index_registers */
+	struct gen8_cp_indexed_reg *index_registers;
+	/* index_registers_len : Length of the index registers */
+	size_t index_registers_len;
+	/* mempool_index_registers : List of CP mempool_index_registers */
+	struct gen8_cp_indexed_reg *mempool_index_registers;
+	/* mempool_index_registers_len : Length of the mempool index registers */
+	size_t mempool_index_registers_len;
+};
+
+#endif /*__ADRENO_GEN8_SNAPSHOT_H */
--- a/qcom/opensource/graphics-kernel/adreno_hfi.h
+++ b/qcom/opensource/graphics-kernel/adreno_hfi.h
--- a/qcom/opensource/graphics-kernel/adreno_hwsched.c
+++ b/qcom/opensource/graphics-kernel/adreno_hwsched.c
--- a/qcom/opensource/graphics-kernel/adreno_hwsched.h
+++ b/qcom/opensource/graphics-kernel/adreno_hwsched.h
@ -0,0 +1,273 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _ADRENO_HWSCHED_H_
+#define _ADRENO_HWSCHED_H_
+
+#include <linux/soc/qcom/msm_hw_fence.h>
+
+#include "kgsl_sync.h"
+
+/* This structure represents inflight command object */
+struct cmd_list_obj {
+	/** @drawobj: Handle to the draw object */
+	struct kgsl_drawobj *drawobj;
+	/** @node: List node to put it in the list of inflight commands */
+	struct list_head node;
+};
+
+/**
+ * struct adreno_hw_fence_entry - A structure to store hardware fence and the context
+ */
+struct adreno_hw_fence_entry {
+	/** @cmd: H2F_MSG_HW_FENCE_INFO packet for this hardware fence */
+	struct hfi_hw_fence_info cmd;
+	/** @kfence: Pointer to the kgsl fence */
+	struct kgsl_sync_fence *kfence;
+	/** @drawctxt: Pointer to the context */
+	struct adreno_context *drawctxt;
+	/** @node: list node to add it to a list */
+	struct list_head node;
+	/** @reset_node: list node to add it to post reset list of hardware fences */
+	struct list_head reset_node;
+};
+
+/**
+ * struct adreno_hwsched_ops - Function table to hook hwscheduler things
+ * to target specific routines
+ */
+struct adreno_hwsched_ops {
+	/**
+	 * @submit_drawobj - Target specific function to submit IBs to hardware
+	 */
+	int (*submit_drawobj)(struct adreno_device *adreno_dev,
+		struct kgsl_drawobj *drawobj);
+	/**
+	 * @preempt_count - Target specific function to get preemption count
+	 */
+	u32 (*preempt_count)(struct adreno_device *adreno_dev);
+	/**
+	 * @create_hw_fence - Target specific function to create a hardware fence
+	 */
+	void (*create_hw_fence)(struct adreno_device *adreno_dev,
+		struct kgsl_sync_fence *kfence);
+
+};
+
+/**
+ * struct adreno_hw_fence - Container for hardware fences instance
+ */
+struct adreno_hw_fence {
+	/** @handle: Handle for hardware fences */
+	void *handle;
+	/** @descriptor: Memory descriptor for hardware fences */
+	struct msm_hw_fence_mem_addr mem_descriptor;
+	/** @memdesc: Kgsl memory descriptor for hardware fences queue */
+	struct kgsl_memdesc memdesc;
+};
+
+/**
+ * struct adreno_hwsched - Container for the hardware scheduler
+ */
+struct adreno_hwsched {
+	 /** @mutex: Mutex needed to run dispatcher function */
+	struct mutex mutex;
+	/** @flags: Container for the dispatcher internal flags */
+	unsigned long flags;
+	/** @inflight: Number of active submissions to the dispatch queues */
+	u32 inflight;
+	/** @jobs - Array of dispatch job lists for each priority level */
+	struct llist_head jobs[16];
+	/** @requeue - Array of lists for dispatch jobs that got requeued */
+	struct llist_head requeue[16];
+	/** @work: The work structure to execute dispatcher function */
+	struct kthread_work work;
+	/** @cmd_list: List of objects submitted to dispatch queues */
+	struct list_head cmd_list;
+	/** @fault: Atomic to record a fault */
+	atomic_t fault;
+	struct kthread_worker *worker;
+	/** @hwsched_ops: Container for target specific hwscheduler ops */
+	const struct adreno_hwsched_ops *hwsched_ops;
+	/** @ctxt_bad: Container for the context bad hfi packet */
+	void *ctxt_bad;
+	/** @idle_gate: Gate to wait on for hwscheduler to idle */
+	struct completion idle_gate;
+	/** @big_cmdobj = Points to the big IB that is inflight */
+	struct kgsl_drawobj_cmd *big_cmdobj;
+	/** @recurring_cmdobj: Recurring commmand object sent to GMU */
+	struct kgsl_drawobj_cmd *recurring_cmdobj;
+	/** @lsr_timer: Timer struct to schedule lsr work */
+	struct timer_list lsr_timer;
+	/** @lsr_check_ws: Lsr work to update power stats */
+	struct work_struct lsr_check_ws;
+	/** @hw_fence: Container for the hw fences instance */
+	struct adreno_hw_fence hw_fence;
+	/** @hw_fence_cache: kmem cache for storing hardware output fences */
+	struct kmem_cache *hw_fence_cache;
+	/** @hw_fence_count: Number of hardware fences that haven't yet been sent to Tx Queue */
+	atomic_t hw_fence_count;
+	/**
+	 * @submission_seqnum: Sequence number for sending submissions to GMU context queues or
+	 * dispatch queues
+	 */
+	atomic_t submission_seqnum;
+	/** @global_ctxtq: Memory descriptor for global context queue */
+	struct kgsl_memdesc global_ctxtq;
+	/** @global_ctxt_gmu_registered: Whether global context is registered with gmu */
+	bool global_ctxt_gmu_registered;
+};
+
+/*
+ * This value is based on maximum number of IBs that can fit
+ * in the ringbuffer.
+ */
+#define HWSCHED_MAX_IBS 2000
+
+enum adreno_hwsched_flags {
+	ADRENO_HWSCHED_POWER = 0,
+	ADRENO_HWSCHED_ACTIVE,
+	ADRENO_HWSCHED_CTX_BAD_LEGACY,
+	ADRENO_HWSCHED_CONTEXT_QUEUE,
+	ADRENO_HWSCHED_HW_FENCE,
+};
+
+/**
+ * adreno_hwsched_trigger - Function to schedule the hwsched thread
+ * @adreno_dev: A handle to adreno device
+ *
+ * Schedule the hw dispatcher for retiring and submitting command objects
+ */
+void adreno_hwsched_trigger(struct adreno_device *adreno_dev);
+
+/**
+ * adreno_hwsched_start() - activate the hwsched dispatcher
+ * @adreno_dev: pointer to the adreno device
+ *
+ * Enable dispatcher thread to execute
+ */
+void adreno_hwsched_start(struct adreno_device *adreno_dev);
+/**
+ * adreno_hwsched_init() - Initialize the hwsched
+ * @adreno_dev: pointer to the adreno device
+ * @hwsched_ops: Pointer to target specific hwsched ops
+ *
+ * Set up the hwsched resources.
+ * Return: 0 on success or negative on failure.
+ */
+int adreno_hwsched_init(struct adreno_device *adreno_dev,
+	const struct adreno_hwsched_ops *hwsched_ops);
+
+/**
+ * adreno_hwsched_fault - Set hwsched fault to request recovery
+ * @adreno_dev: A handle to adreno device
+ * @fault: The type of fault
+ */
+void adreno_hwsched_fault(struct adreno_device *adreno_dev, u32 fault);
+
+/**
+ * adreno_hwsched_clear_fault() - Clear the hwsched fault
+ * @adreno_dev: A pointer to an adreno_device structure
+ *
+ * Clear the hwsched fault status for adreno device
+ */
+void adreno_hwsched_clear_fault(struct adreno_device *adreno_dev);
+
+/**
+ * adreno_hwsched_parse_fault_ib - Parse the faulty submission
+ * @adreno_dev: pointer to the adreno device
+ * @snapshot: Pointer to the snapshot structure
+ *
+ * Walk the list of active submissions to find the one that faulted and
+ * parse it so that relevant command buffers can be added to the snapshot
+ */
+void adreno_hwsched_parse_fault_cmdobj(struct adreno_device *adreno_dev,
+	struct kgsl_snapshot *snapshot);
+
+void adreno_hwsched_flush(struct adreno_device *adreno_dev);
+
+/**
+ * adreno_hwsched_unregister_contexts - Reset context gmu_registered bit
+ * @adreno_dev: pointer to the adreno device
+ *
+ * Walk the list of contexts and reset the gmu_registered for all
+ * contexts
+ */
+void adreno_hwsched_unregister_contexts(struct adreno_device *adreno_dev);
+
+/**
+ * adreno_hwsched_idle - Wait for dispatcher and hardware to become idle
+ * @adreno_dev: A handle to adreno device
+ *
+ * Return: 0 on success or negative error on failure
+ */
+int adreno_hwsched_idle(struct adreno_device *adreno_dev);
+
+void adreno_hwsched_retire_cmdobj(struct adreno_hwsched *hwsched,
+	struct kgsl_drawobj_cmd *cmdobj);
+
+bool adreno_hwsched_context_queue_enabled(struct adreno_device *adreno_dev);
+
+/**
+ * adreno_hwsched_register_hw_fence - Register GPU as a hardware fence client
+ * @adreno_dev: pointer to the adreno device
+ *
+ * Register with the hardware fence driver to be able to trigger and wait
+ * for hardware fences. Also, set up the memory descriptor for mapping the
+ * client queue to the GMU.
+ */
+void adreno_hwsched_register_hw_fence(struct adreno_device *adreno_dev);
+
+/**
+ * adreno_hwsched_deregister_hw_fence - Deregister GPU as a hardware fence client
+ * @adreno_dev: pointer to the adreno device
+ *
+ * Deregister with the hardware fence driver and free up any resources allocated
+ * as part of registering with the hardware fence driver
+ */
+void adreno_hwsched_deregister_hw_fence(struct adreno_device *adreno_dev);
+
+/**
+ * adreno_hwsched_replay - Resubmit inflight cmdbatches after gpu reset
+ * @adreno_dev: pointer to the adreno device
+ *
+ * Resubmit all cmdbatches to GMU after device reset
+ */
+void adreno_hwsched_replay(struct adreno_device *adreno_dev);
+
+/**
+ * adreno_hwsched_parse_payload - Parse payload to look up a key
+ * @payload: Pointer to a payload section
+ * @key: The key who's value is to be looked up
+ *
+ * This function parses the payload data which is a sequence
+ * of key-value pairs.
+ *
+ * Return: The value of the key or 0 if key is not found
+ */
+u32 adreno_hwsched_parse_payload(struct payload_section *payload, u32 key);
+
+/**
+ * adreno_hwsched_gpu_fault - Gets hwsched gpu fault info
+ * @adreno_dev: pointer to the adreno device
+ *
+ * Returns zero for hwsched fault else non zero value
+ */
+u32 adreno_hwsched_gpu_fault(struct adreno_device *adreno_dev);
+
+/**
+ * adreno_hwsched_log_nonfatal_gpu_fault - Logs non fatal GPU error from context bad hfi packet
+ * @adreno_dev: pointer to the adreno device
+ * @dev: Pointer to the struct device for the GMU platform device
+ * @error: Types of error that triggered from context bad HFI
+ *
+ * This function parses context bad hfi packet and logs error information.
+ *
+ * Return: True for non fatal error code else false.
+ */
+bool adreno_hwsched_log_nonfatal_gpu_fault(struct adreno_device *adreno_dev,
+		struct device *dev, u32 error);
+#endif
--- a/qcom/opensource/graphics-kernel/adreno_ioctl.c
+++ b/qcom/opensource/graphics-kernel/adreno_ioctl.c
@ -0,0 +1,313 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2002,2007-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/slab.h>
+
+#include "adreno.h"
+#include "adreno_a5xx.h"
+
+/*
+ * Add a perfcounter to the per-fd list.
+ * Call with the device mutex held
+ */
+static int adreno_process_perfcounter_add(struct kgsl_device_private *dev_priv,
+	unsigned int groupid, unsigned int countable)
+{
+	struct adreno_device_private *adreno_priv = container_of(dev_priv,
+		struct adreno_device_private, dev_priv);
+	struct adreno_perfcounter_list_node *perfctr;
+
+	perfctr = kmalloc(sizeof(*perfctr), GFP_KERNEL);
+	if (!perfctr)
+		return -ENOMEM;
+
+	perfctr->groupid = groupid;
+	perfctr->countable = countable;
+
+	/* add the pair to process perfcounter list */
+	list_add(&perfctr->node, &adreno_priv->perfcounter_list);
+	return 0;
+}
+
+/*
+ * Remove a perfcounter from the per-fd list.
+ * Call with the device mutex held
+ */
+static int adreno_process_perfcounter_del(struct kgsl_device_private *dev_priv,
+	unsigned int groupid, unsigned int countable)
+{
+	struct adreno_device_private *adreno_priv = container_of(dev_priv,
+		struct adreno_device_private, dev_priv);
+	struct adreno_perfcounter_list_node *p;
+
+	list_for_each_entry(p, &adreno_priv->perfcounter_list, node) {
+		if (p->groupid == groupid && p->countable == countable) {
+			list_del(&p->node);
+			kfree(p);
+			return 0;
+		}
+	}
+	return -ENODEV;
+}
+
+long adreno_ioctl_perfcounter_get(struct kgsl_device_private *dev_priv,
+	unsigned int cmd, void *data)
+{
+	struct kgsl_device *device = dev_priv->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_perfcounter_get *get = data;
+	int result;
+
+	mutex_lock(&device->mutex);
+
+	/*
+	 * adreno_perfcounter_get() is called by kernel clients
+	 * during start(), so it is not safe to take an
+	 * active count inside that function.
+	 */
+
+	result = adreno_perfcntr_active_oob_get(adreno_dev);
+	if (result) {
+		mutex_unlock(&device->mutex);
+		return (long)result;
+	}
+
+	result = adreno_perfcounter_get(adreno_dev,
+			get->groupid, get->countable, &get->offset,
+			&get->offset_hi, PERFCOUNTER_FLAG_NONE);
+
+	/* Add the perfcounter into the list */
+	if (!result) {
+		result = adreno_process_perfcounter_add(dev_priv, get->groupid,
+				get->countable);
+		if (result)
+			adreno_perfcounter_put(adreno_dev, get->groupid,
+				get->countable, PERFCOUNTER_FLAG_NONE);
+	}
+
+	adreno_perfcntr_active_oob_put(adreno_dev);
+
+	mutex_unlock(&device->mutex);
+
+	return (long) result;
+}
+
+long adreno_ioctl_perfcounter_put(struct kgsl_device_private *dev_priv,
+		unsigned int cmd, void *data)
+{
+	struct kgsl_device *device = dev_priv->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_perfcounter_put *put = data;
+	int result;
+
+	mutex_lock(&device->mutex);
+
+	/* Delete the perfcounter from the process list */
+	result = adreno_process_perfcounter_del(dev_priv, put->groupid,
+		put->countable);
+
+	/* Put the perfcounter refcount */
+	if (!result)
+		adreno_perfcounter_put(adreno_dev, put->groupid,
+			put->countable, PERFCOUNTER_FLAG_NONE);
+	mutex_unlock(&device->mutex);
+
+	return (long) result;
+}
+
+static long adreno_ioctl_perfcounter_query(struct kgsl_device_private *dev_priv,
+		unsigned int cmd, void *data)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(dev_priv->device);
+	struct kgsl_perfcounter_query *query = data;
+
+	return (long) adreno_perfcounter_query_group(adreno_dev, query->groupid,
+			query->countables, query->count, &query->max_counters);
+}
+
+static long adreno_ioctl_perfcounter_read(struct kgsl_device_private *dev_priv,
+		unsigned int cmd, void *data)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(dev_priv->device);
+	struct kgsl_perfcounter_read *read = data;
+
+	/*
+	 * When performance counter zapping is enabled, the counters are cleared
+	 * across context switches. Reading the counters when they are zapped is
+	 * not permitted.
+	 */
+	if (!adreno_dev->perfcounter)
+		return -EPERM;
+
+	return (long) adreno_perfcounter_read_group(adreno_dev, read->reads,
+		read->count);
+}
+
+static long adreno_ioctl_preemption_counters_query(
+		struct kgsl_device_private *dev_priv,
+		unsigned int cmd, void *data)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(dev_priv->device);
+	struct kgsl_preemption_counters_query *read = data;
+	int size_level = A5XX_CP_CTXRECORD_PREEMPTION_COUNTER_SIZE;
+	int levels_to_copy;
+
+	if (!adreno_is_a5xx(adreno_dev) ||
+		!adreno_is_preemption_enabled(adreno_dev))
+		return -EOPNOTSUPP;
+
+	if (read->size_user < size_level)
+		return -EINVAL;
+
+	/* Calculate number of preemption counter levels to copy to userspace */
+	levels_to_copy = (read->size_user / size_level);
+
+	levels_to_copy = min_t(int, levels_to_copy,
+		ARRAY_SIZE(adreno_dev->ringbuffers));
+
+	if (copy_to_user(u64_to_user_ptr(read->counters),
+			adreno_dev->preempt.scratch->hostptr,
+			levels_to_copy * size_level))
+		return -EFAULT;
+
+	read->max_priority_level = levels_to_copy;
+	read->size_priority_level = size_level;
+
+	return 0;
+}
+
+static long adreno_ioctl_read_calibrated_ts(struct kgsl_device_private *dev_priv,
+		unsigned int cmd, void *data)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(dev_priv->device);
+	struct kgsl_read_calibrated_timestamps *reads = data;
+	unsigned long flags;
+	u32 *sources = NULL;
+	u64 *ts = NULL;
+	u64 start;
+	u64 samples[KGSL_CALIBRATED_TIME_DOMAIN_MAX] = {0};
+	u32 i;
+	int ret = 0;
+
+	/* Reading calibrated timestamps requires the CX timer be initialized */
+	if (!test_bit(ADRENO_DEVICE_CX_TIMER_INITIALIZED, &adreno_dev->priv))
+		return -EOPNOTSUPP;
+
+	/* Check that the number of timestamps is reasonable */
+	if (!reads->count ||
+		(reads->count > (2 * KGSL_CALIBRATED_TIME_DOMAIN_MAX)))
+		return -EINVAL;
+
+	sources = kvcalloc(reads->count, sizeof(*sources), GFP_KERNEL);
+	if (!sources)
+		return -ENOMEM;
+
+	if (copy_from_user(sources, u64_to_user_ptr(reads->sources),
+			reads->count * sizeof(*sources))) {
+		ret = -EFAULT;
+		goto done;
+	}
+
+	for (i = 0; i < reads->count; i++) {
+		if (sources[i] >= KGSL_CALIBRATED_TIME_DOMAIN_MAX) {
+			ret = -EINVAL;
+			goto done;
+		}
+	}
+
+	ts = kvcalloc(reads->count, sizeof(*ts), GFP_KERNEL);
+	if (!ts) {
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	/* Disable local irqs to prevent context switch delays */
+	local_irq_save(flags);
+
+	/* Sample the MONOTONIC_RAW domain for use in calculating deviation */
+	start = (u64)ktime_to_ns(ktime_get_raw());
+
+	samples[KGSL_CALIBRATED_TIME_DOMAIN_DEVICE] =
+				adreno_read_cx_timer(adreno_dev);
+	samples[KGSL_CALIBRATED_TIME_DOMAIN_MONOTONIC] =
+				(u64)ktime_to_ns(ktime_get());
+	samples[KGSL_CALIBRATED_TIME_DOMAIN_MONOTONIC_RAW] =
+				(u64)ktime_to_ns(ktime_get_raw());
+
+	/* Done collecting timestamps. Re-enable irqs */
+	local_irq_restore(flags);
+
+	/* Calculate deviation in reads based on the MONOTONIC_RAW samples */
+	reads->deviation = samples[KGSL_CALIBRATED_TIME_DOMAIN_MONOTONIC_RAW] - start;
+
+	for (i = 0; i < reads->count; i++)
+		ts[i] = samples[sources[i]];
+
+	if (copy_to_user(u64_to_user_ptr(reads->ts), ts, reads->count * sizeof(*ts)))
+		ret = -EFAULT;
+
+done:
+	kvfree(ts);
+	kvfree(sources);
+	return ret;
+}
+
+long adreno_ioctl_helper(struct kgsl_device_private *dev_priv,
+		unsigned int cmd, unsigned long arg,
+		const struct kgsl_ioctl *cmds, int len)
+{
+	unsigned char data[128] = { 0 };
+	long ret;
+	int i;
+
+	for (i = 0; i < len; i++) {
+		if (_IOC_NR(cmd) == _IOC_NR(cmds[i].cmd))
+			break;
+	}
+
+	if (i == len)
+		return -ENOIOCTLCMD;
+
+	if (_IOC_SIZE(cmds[i].cmd > sizeof(data))) {
+		dev_err_ratelimited(dev_priv->device->dev,
+			"data too big for ioctl 0x%08x: %d/%zu\n",
+			cmd, _IOC_SIZE(cmds[i].cmd), sizeof(data));
+		return -EINVAL;
+	}
+
+	if (_IOC_SIZE(cmds[i].cmd)) {
+		ret = kgsl_ioctl_copy_in(cmds[i].cmd, cmd, arg, data);
+
+		if (ret)
+			return ret;
+	} else {
+		memset(data, 0, sizeof(data));
+	}
+
+	ret = cmds[i].func(dev_priv, cmd, data);
+
+	if (ret == 0 && _IOC_SIZE(cmds[i].cmd))
+		ret = kgsl_ioctl_copy_out(cmds[i].cmd, cmd, arg, data);
+
+	return ret;
+}
+
+static struct kgsl_ioctl adreno_ioctl_funcs[] = {
+	{ IOCTL_KGSL_PERFCOUNTER_GET, adreno_ioctl_perfcounter_get },
+	{ IOCTL_KGSL_PERFCOUNTER_PUT, adreno_ioctl_perfcounter_put },
+	{ IOCTL_KGSL_PERFCOUNTER_QUERY, adreno_ioctl_perfcounter_query },
+	{ IOCTL_KGSL_PERFCOUNTER_READ, adreno_ioctl_perfcounter_read },
+	{ IOCTL_KGSL_PREEMPTIONCOUNTER_QUERY,
+		adreno_ioctl_preemption_counters_query },
+	{ IOCTL_KGSL_READ_CALIBRATED_TIMESTAMPS, adreno_ioctl_read_calibrated_ts },
+};
+
+long adreno_ioctl(struct kgsl_device_private *dev_priv,
+			      unsigned int cmd, unsigned long arg)
+{
+	return adreno_ioctl_helper(dev_priv, cmd, arg,
+		adreno_ioctl_funcs, ARRAY_SIZE(adreno_ioctl_funcs));
+}
--- a/Show More
+++ b/Show More