drm/amdgpu: Fix ecc irq enable/disable unpaired

[ Upstream commit a32c6f7f5737cc7e31cd7ad5133f0d96fca12ea6 ]

The ecc_irq is disabled while GPU mode2 reset suspending process,
but not be enabled during GPU mode2 reset resume process.

Changed from V1:
	only do sdma/gfx ras_late_init in aldebaran_mode2_restore_ip
	delete amdgpu_ras_late_resume function

Changed from V2:
	check umc ras supported before put ecc_irq

Signed-off-by: Stanley.Yang <Stanley.Yang@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
Stanley.Yang 2023-12-15 16:13:23 +08:00 committed by Greg Kroah-Hartman
parent aade0a0760
commit adae24c5b3
4 changed files with 38 additions and 1 deletions

View File

@ -333,6 +333,7 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
{
struct list_head *reset_device_list = reset_context->reset_device_list;
struct amdgpu_device *tmp_adev = NULL;
struct amdgpu_ras *con;
int r;
if (reset_device_list == NULL)
@ -358,7 +359,30 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
*/
amdgpu_register_gpu_instance(tmp_adev);
/* Resume RAS */
/* Resume RAS, ecc_irq */
con = amdgpu_ras_get_context(tmp_adev);
if (!amdgpu_sriov_vf(tmp_adev) && con) {
if (tmp_adev->sdma.ras &&
tmp_adev->sdma.ras->ras_block.ras_late_init) {
r = tmp_adev->sdma.ras->ras_block.ras_late_init(tmp_adev,
&tmp_adev->sdma.ras->ras_block.ras_comm);
if (r) {
dev_err(tmp_adev->dev, "SDMA failed to execute ras_late_init! ret:%d\n", r);
goto end;
}
}
if (tmp_adev->gfx.ras &&
tmp_adev->gfx.ras->ras_block.ras_late_init) {
r = tmp_adev->gfx.ras->ras_block.ras_late_init(tmp_adev,
&tmp_adev->gfx.ras->ras_block.ras_comm);
if (r) {
dev_err(tmp_adev->dev, "GFX failed to execute ras_late_init! ret:%d\n", r);
goto end;
}
}
}
amdgpu_ras_resume(tmp_adev);
/* Update PSP FW topology after reset */

View File

@ -1144,6 +1144,10 @@ static int gmc_v10_0_hw_fini(void *handle)
amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
if (adev->gmc.ecc_irq.funcs &&
amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC))
amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
return 0;
}

View File

@ -951,6 +951,11 @@ static int gmc_v11_0_hw_fini(void *handle)
}
amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
if (adev->gmc.ecc_irq.funcs &&
amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC))
amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
gmc_v11_0_gart_disable(adev);
return 0;

View File

@ -1900,6 +1900,10 @@ static int gmc_v9_0_hw_fini(void *handle)
amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
if (adev->gmc.ecc_irq.funcs &&
amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC))
amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
return 0;
}