[PATCH] drm/amdkfd: CWSR with software scheduler
Kasiviswanathan, Harish
Harish.Kasiviswanathan at amd.com
Wed Aug 11 16:32:11 UTC 2021
[AMD Official Use Only]
-----Original Message-----
From: Joshi, Mukul <Mukul.Joshi at amd.com>
Sent: Wednesday, August 11, 2021 10:57 AM
To: Kasiviswanathan, Harish <Harish.Kasiviswanathan at amd.com>; amd-gfx at lists.freedesktop.org
Cc: Kuehling, Felix <Felix.Kuehling at amd.com>; Cornwall, Jay <Jay.Cornwall at amd.com>
Subject: RE: [PATCH] drm/amdkfd: CWSR with software scheduler
[AMD Official Use Only]
> -----Original Message-----
> From: Kasiviswanathan, Harish <Harish.Kasiviswanathan at amd.com>
> Sent: Tuesday, August 10, 2021 9:15 PM
> To: Joshi, Mukul <Mukul.Joshi at amd.com>; amd-gfx at lists.freedesktop.org
> Cc: Kuehling, Felix <Felix.Kuehling at amd.com>; Cornwall, Jay
> <Jay.Cornwall at amd.com>; Joshi, Mukul <Mukul.Joshi at amd.com>
> Subject: RE: [PATCH] drm/amdkfd: CWSR with software scheduler
>
> [AMD Official Use Only]
>
> Just few comments inline. With that acknowledged Reviewed-by: Harish
> Kasiviswanathan <Harish.Kasiviswanathan at amd.com>
>
> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces at lists.freedesktop.org> On Behalf Of
> Mukul Joshi
> Sent: Monday, August 9, 2021 4:41 PM
> To: amd-gfx at lists.freedesktop.org
> Cc: Kuehling, Felix <Felix.Kuehling at amd.com>; Cornwall, Jay
> <Jay.Cornwall at amd.com>; Joshi, Mukul <Mukul.Joshi at amd.com>
> Subject: [PATCH] drm/amdkfd: CWSR with software scheduler
>
> This patch adds support to program trap handler settings when loading
> driver with software scheduler (sched_policy=2).
>
> Signed-off-by: Mukul Joshi <mukul.joshi at amd.com>
> Suggested-by: Jay Cornwall <Jay.Cornwall at amd.com>
> ---
> .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c | 31 +++++++++++++++++
> .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c | 31 +++++++++++++++++
> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 33
> ++++++++++++++++++- .../drm/amd/amdkfd/kfd_device_queue_manager.c |
> 20 +++++++++--
> .../gpu/drm/amd/include/kgd_kfd_interface.h | 3 ++
> 5 files changed, 115 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> index 491acdf92f73..960acf68150a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> @@ -560,6 +560,9 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd,
> void *mqd,
> case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
> type = RESET_WAVES;
> break;
> + case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
> + type = SAVE_WAVES;
> + break;
> default:
> type = DRAIN_PIPE;
> break;
> @@ -754,6 +757,33 @@ static void set_vm_context_page_table_base(struct
> kgd_dev *kgd, uint32_t vmid,
> adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base); }
>
> +static void program_trap_handler_settings(struct kgd_dev *kgd,
> + uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr) {
> + struct amdgpu_device *adev = get_amdgpu_device(kgd);
> +
> + lock_srbm(kgd, 0, 0, 0, vmid);
> +
> + /*
> + * Program TBA registers
> + */
> + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO),
> + lower_32_bits(tba_addr >> 8));
> + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI),
> + upper_32_bits(tba_addr >> 8) |
> + (1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT));
> +
> + /*
> + * Program TMA registers
> + */
> + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO),
> + lower_32_bits(tma_addr >> 8));
> + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI),
> + upper_32_bits(tma_addr >> 8));
> +
> + unlock_srbm(kgd);
> +}
> +
> const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
> .program_sh_mem_settings = kgd_program_sh_mem_settings,
> .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, @@ -774,4
> +804,5 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
> .get_atc_vmid_pasid_mapping_info =
> get_atc_vmid_pasid_mapping_info,
> .set_vm_context_page_table_base =
> set_vm_context_page_table_base,
> + .program_trap_handler_settings = program_trap_handler_settings,
>
> [HK]: Naming not consistent. program_trap_handler_settings,
> program_trap_handler_settings_v10_3 and
> kgd_gfx_v9_program_trap_handler_settings
>
I am following the naming convention for the other functions defined in these files.
That's the reason the functions names are different. I don't mind naming them all the same.
[HK]: Thanks for clarifying. I am fine with as it is.
> };
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
> index 1f5620cc3570..dac0d751d5af 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
> @@ -537,6 +537,9 @@ static int hqd_destroy_v10_3(struct kgd_dev *kgd,
> void *mqd,
> case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
> type = RESET_WAVES;
> break;
> + case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
> + type = SAVE_WAVES;
> + break;
> default:
> type = DRAIN_PIPE;
> break;
> @@ -658,6 +661,33 @@ static void
> set_vm_context_page_table_base_v10_3(struct kgd_dev *kgd, uint32_t v
> adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base); }
>
> +static void program_trap_handler_settings_v10_3(struct kgd_dev *kgd,
> + uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr) {
> + struct amdgpu_device *adev = get_amdgpu_device(kgd);
> +
> + lock_srbm(kgd, 0, 0, 0, vmid);
> +
> + /*
> + * Program TBA registers
> + */
> + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO),
> + lower_32_bits(tba_addr >> 8));
> + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI),
> + upper_32_bits(tba_addr >> 8) |
> + (1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT));
> +
> + /*
> + * Program TMA registers
> + */
> + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO),
> + lower_32_bits(tma_addr >> 8));
> + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI),
> + upper_32_bits(tma_addr >> 8));
> +
> + unlock_srbm(kgd);
> +}
> +
> #if 0
> uint32_t enable_debug_trap_v10_3(struct kgd_dev *kgd,
> uint32_t trap_debug_wave_launch_mode, @@
> -820,6 +850,7 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = {
> .address_watch_get_offset = address_watch_get_offset_v10_3,
> .get_atc_vmid_pasid_mapping_info = NULL,
> .set_vm_context_page_table_base =
> set_vm_context_page_table_base_v10_3,
> + .program_trap_handler_settings =
> program_trap_handler_settings_v10_3,
> #if 0
> .enable_debug_trap = enable_debug_trap_v10_3,
> .disable_debug_trap = disable_debug_trap_v10_3, diff --git
> a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> index ed3014fbb563..154244916727 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> @@ -42,7 +42,8 @@
> enum hqd_dequeue_request_type {
> NO_ACTION = 0,
> DRAIN_PIPE,
> - RESET_WAVES
> + RESET_WAVES,
> + SAVE_WAVES
> };
>
> static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev
> *kgd) @@ -566,6 +567,9 @@ int kgd_gfx_v9_hqd_destroy(struct kgd_dev
> *kgd, void *mqd,
> case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
> type = RESET_WAVES;
> break;
> + case KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
> + type = SAVE_WAVES;
> + break;
> default:
> type = DRAIN_PIPE;
> break;
> @@ -878,6 +882,32 @@ void kgd_gfx_v9_get_cu_occupancy(struct kgd_dev
> *kgd, int pasid,
> adev->gfx.cu_info.max_waves_per_simd;
> }
>
> +static void kgd_gfx_v9_program_trap_handler_settings(struct kgd_dev *kgd,
> + uint32_t vmid, uint64_t tba_addr, uint64_t
> +tma_addr) {
> + struct amdgpu_device *adev = get_amdgpu_device(kgd);
> +
> + lock_srbm(kgd, 0, 0, 0, vmid);
> +
> + /*
> + * Program TBA registers
> + */
> + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO),
> + lower_32_bits(tba_addr >> 8));
> + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI),
> + upper_32_bits(tba_addr >> 8));
> +
> + /*
> + * Program TMA registers
> + */
> + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO),
> + lower_32_bits(tma_addr >> 8));
> + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI),
> + upper_32_bits(tma_addr >> 8));
> +
> + unlock_srbm(kgd);
> +}
> +
> const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
> .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
> .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
> @@ -899,4 +929,5 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
> kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
> .set_vm_context_page_table_base =
> kgd_gfx_v9_set_vm_context_page_table_base,
> .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
> + .program_trap_handler_settings =
> +kgd_gfx_v9_program_trap_handler_settings,
> };
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index a972ef5eae68..6fd6b2248992 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -211,6 +211,14 @@ static void deallocate_doorbell(struct
> qcm_process_device *qpd,
> WARN_ON(!old);
> }
>
> +static void program_trap_handler_settings(struct device_queue_manager
> *dqm,
> + struct qcm_process_device *qpd)
> +{
> + return dqm->dev->kfd2kgd->program_trap_handler_settings(
> + dqm->dev->kgd, qpd->vmid,
> + qpd->tba_addr, qpd-
> >tma_addr);
>
> [HK]: Since, this is not defined for all GFX generations, safer to
> check if function pointer is not NULL. I see it is called only VEGA10+
>
Sure I can add that. However, I am making sure that the function is defined for all ASICs after VEGA10+.
Regards,
Mukul
> +}
> +
> static int allocate_vmid(struct device_queue_manager *dqm,
> struct qcm_process_device *qpd,
> struct queue *q)
> @@ -241,6 +249,10 @@ static int allocate_vmid(struct
> device_queue_manager *dqm,
>
> program_sh_mem_settings(dqm, qpd);
>
> + if (dqm->dev->device_info->asic_family >= CHIP_VEGA10 &&
> + dqm->dev->cwsr_enabled)
> + program_trap_handler_settings(dqm, qpd);
> +
> /* qpd->page_table_base is set earlier when register_process()
> * is called, i.e. when the first queue is created.
> */
> @@ -582,7 +594,9 @@ static int update_queue(struct
> device_queue_manager *dqm, struct queue *q)
> }
>
> retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
> - KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
> + (dqm->dev->cwsr_enabled?
> + KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
> + KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN),
> KFD_UNMAP_LATENCY_MS, q->pipe, q-
> >queue);
> if (retval) {
> pr_err("destroy mqd failed\n");
> @@ -675,7 +689,9 @@ static int evict_process_queues_nocpsch(struct
> device_queue_manager *dqm,
> continue;
>
> retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
> - KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
> + (dqm->dev->cwsr_enabled?
> + KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
> + KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN),
> KFD_UNMAP_LATENCY_MS, q->pipe, q-
> >queue);
> if (retval && !ret)
> /* Return the first error, but keep going to diff --git
> a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> index 95c656d205ed..c84bd7b2cf59 100644
> --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> @@ -44,6 +44,7 @@ struct kgd_mem;
> enum kfd_preempt_type {
> KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN = 0,
> KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
> + KFD_PREEMPT_TYPE_WAVEFRONT_SAVE
> };
>
> struct kfd_vm_fault_info {
> @@ -298,6 +299,8 @@ struct kfd2kgd_calls {
>
> void (*get_cu_occupancy)(struct kgd_dev *kgd, int pasid, int
> *wave_cnt,
> int *max_waves_per_cu);
> + void (*program_trap_handler_settings)(struct kgd_dev *kgd,
> + uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr);
> };
>
> #endif /* KGD_KFD_INTERFACE_H_INCLUDED */
> --
> 2.17.1
More information about the amd-gfx
mailing list