[PATCH 2/2] drm/amdkfd: map SVM range with correct access permission
Felix Kuehling
felix.kuehling at amd.com
Thu Aug 19 21:32:38 UTC 2021
Am 2021-08-19 um 10:56 a.m. schrieb Philip Yang:
> Restore retry fault or prefetch range, or restore svm range after
> eviction to map range to GPU with correct read or write access
> permission.
>
> Range may includes multiple VMAs, update GPU page table with offset of
> prange, number of pages for each VMA according VMA access permission.
>
> Signed-off-by: Philip Yang <Philip.Yang at amd.com>
Minor nitpicks, and one question. See inline. It looks good otherwise.
> ---
> drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 131 +++++++++++++++++----------
> 1 file changed, 84 insertions(+), 47 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index cf1009bb532a..94612581963f 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> @@ -120,6 +120,7 @@ static void svm_range_remove_notifier(struct svm_range *prange)
>
> static int
> svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
> + unsigned long offset, unsigned long npages,
> unsigned long *hmm_pfns, uint32_t gpuidx)
> {
> enum dma_data_direction dir = DMA_BIDIRECTIONAL;
> @@ -136,7 +137,8 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
> prange->dma_addr[gpuidx] = addr;
> }
>
> - for (i = 0; i < prange->npages; i++) {
> + addr += offset;
> + for (i = 0; i < npages; i++) {
> if (WARN_ONCE(addr[i] && !dma_mapping_error(dev, addr[i]),
> "leaking dma mapping\n"))
> dma_unmap_page(dev, addr[i], PAGE_SIZE, dir);
> @@ -167,6 +169,7 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
>
> static int
> svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
> + unsigned long offset, unsigned long npages,
> unsigned long *hmm_pfns)
> {
> struct kfd_process *p;
> @@ -187,7 +190,8 @@ svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
> }
> adev = (struct amdgpu_device *)pdd->dev->kgd;
>
> - r = svm_range_dma_map_dev(adev, prange, hmm_pfns, gpuidx);
> + r = svm_range_dma_map_dev(adev, prange, offset, npages,
> + hmm_pfns, gpuidx);
> if (r)
> break;
> }
> @@ -1088,11 +1092,6 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange,
> pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0;
>
> pte_flags |= amdgpu_gem_va_map_flags(adev, mapping_flags);
> -
> - pr_debug("svms 0x%p [0x%lx 0x%lx] vram %d PTE 0x%llx mapping 0x%x\n",
> - prange->svms, prange->start, prange->last,
> - (domain == SVM_RANGE_VRAM_DOMAIN) ? 1:0, pte_flags, mapping_flags);
> -
> return pte_flags;
> }
>
> @@ -1156,7 +1155,8 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start,
>
> static int
> svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> - struct svm_range *prange, dma_addr_t *dma_addr,
> + struct svm_range *prange, unsigned long offset,
> + unsigned long npages, bool readonly, dma_addr_t *dma_addr,
> struct amdgpu_device *bo_adev, struct dma_fence **fence)
> {
> struct amdgpu_bo_va bo_va;
> @@ -1167,14 +1167,15 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> int r = 0;
> int64_t i;
>
> - pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
> - prange->last);
> + last_start = prange->start + offset;
> +
> + pr_debug("svms 0x%p [0x%lx 0x%lx] readonly %d\n", prange->svms,
> + last_start, last_start + npages - 1, readonly);
>
> if (prange->svm_bo && prange->ttm_res)
> bo_va.is_xgmi = amdgpu_xgmi_same_hive(adev, bo_adev);
>
> - last_start = prange->start;
> - for (i = 0; i < prange->npages; i++) {
> + for (i = offset; i < offset + npages; i++) {
> last_domain = dma_addr[i] & SVM_RANGE_VRAM_DOMAIN;
> dma_addr[i] &= ~SVM_RANGE_VRAM_DOMAIN;
> if ((prange->start + i) < prange->last &&
> @@ -1183,13 +1184,21 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>
> pr_debug("Mapping range [0x%lx 0x%llx] on domain: %s\n",
> last_start, prange->start + i, last_domain ? "GPU" : "CPU");
> +
> pte_flags = svm_range_get_pte_flags(adev, prange, last_domain);
> - r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, NULL,
> - last_start,
> + if (readonly)
> + pte_flags &= ~AMDGPU_PTE_WRITEABLE;
> +
> + pr_debug("svms 0x%p map [0x%lx 0x%llx] vram %d PTE 0x%llx\n",
> + prange->svms, last_start, prange->start + i,
> + (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0,
> + pte_flags);
> +
> + r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false,
> + NULL, last_start,
> prange->start + i, pte_flags,
> last_start - prange->start,
> - NULL,
> - dma_addr,
> + NULL, dma_addr,
> &vm->last_update,
> &table_freed);
> if (r) {
> @@ -1220,8 +1229,10 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> return r;
> }
>
> -static int svm_range_map_to_gpus(struct svm_range *prange,
> - unsigned long *bitmap, bool wait)
> +static int
> +svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset,
> + unsigned long npages, bool readonly,
> + unsigned long *bitmap, bool wait)
> {
> struct kfd_process_device *pdd;
> struct amdgpu_device *bo_adev;
> @@ -1257,7 +1268,8 @@ static int svm_range_map_to_gpus(struct svm_range *prange,
> }
>
> r = svm_range_map_to_gpu(adev, drm_priv_to_vm(pdd->drm_priv),
> - prange, prange->dma_addr[gpuidx],
> + prange, offset, npages, readonly,
> + prange->dma_addr[gpuidx],
> bo_adev, wait ? &fence : NULL);
> if (r)
> break;
> @@ -1390,6 +1402,7 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
> int32_t gpuidx, bool intr, bool wait)
> {
> struct svm_validate_context ctx;
> + unsigned long start, end, addr;
> struct hmm_range *hmm_range;
> struct kfd_process *p;
> void *owner;
> @@ -1448,40 +1461,64 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
> break;
> }
> }
> - r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL,
> - prange->start << PAGE_SHIFT,
> - prange->npages, &hmm_range,
> - false, true, owner);
> - if (r) {
> - pr_debug("failed %d to get svm range pages\n", r);
> - goto unreserve_out;
> - }
>
> - r = svm_range_dma_map(prange, ctx.bitmap,
> - hmm_range->hmm_pfns);
> - if (r) {
> - pr_debug("failed %d to dma map range\n", r);
> - goto unreserve_out;
> - }
> + start = prange->start << PAGE_SHIFT;
> + end = (prange->last + 1) << PAGE_SHIFT;
> + for (addr = start; addr < end && !r; ) {
> + struct vm_area_struct *vma;
> + unsigned long next;
> + unsigned long offset;
> + unsigned long npages;
> + bool readonly;
>
> - prange->validated_once = true;
> + vma = find_vma(mm, addr);
> + if (!vma || addr < vma->vm_start) {
> + r = -EINVAL;
I think -EFAULT would be the appropriate error code here.
> + goto unreserve_out;
> + }
> + readonly = !(vma->vm_flags & VM_WRITE);
>
> - svm_range_lock(prange);
> - if (amdgpu_hmm_range_get_pages_done(hmm_range)) {
> - pr_debug("hmm update the range, need validate again\n");
> - r = -EAGAIN;
> - goto unlock_out;
> - }
> - if (!list_empty(&prange->child_list)) {
> - pr_debug("range split by unmap in parallel, validate again\n");
> - r = -EAGAIN;
> - goto unlock_out;
> - }
> + next = min(vma->vm_end, end);
> + npages = (next - addr) / PAGE_SIZE;
Use >> PAGE_SHIFT for consistency.
> + r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL,
> + addr, npages, &hmm_range,
> + readonly, true, owner);
> + if (r) {
> + pr_debug("failed %d to get svm range pages\n", r);
> + goto unreserve_out;
> + }
>
> - r = svm_range_map_to_gpus(prange, ctx.bitmap, wait);
> + offset = (addr - start) / PAGE_SIZE;
>> PAGE_SHIFT
> + r = svm_range_dma_map(prange, ctx.bitmap, offset, npages,
> + hmm_range->hmm_pfns);
> + if (r) {
> + pr_debug("failed %d to dma map range\n", r);
> + goto unreserve_out;
> + }
> +
> + svm_range_lock(prange);
> + if (amdgpu_hmm_range_get_pages_done(hmm_range)) {
> + pr_debug("hmm update the range, need validate again\n");
> + r = -EAGAIN;
> + goto unlock_out;
> + }
> + if (!list_empty(&prange->child_list)) {
> + pr_debug("range split by unmap in parallel, validate again\n");
> + r = -EAGAIN;
> + goto unlock_out;
> + }
> +
> + r = svm_range_map_to_gpus(prange, offset, npages, readonly,
> + ctx.bitmap, wait);
>
> unlock_out:
> - svm_range_unlock(prange);
> + svm_range_unlock(prange);
> +
> + addr = next;
> + }
> +
> + prange->validated_once = true;
Should this be conditional on "!r"?
Regards,
Felix
> +
> unreserve_out:
> svm_range_unreserve_bos(&ctx);
>
More information about the amd-gfx
mailing list