diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index f42c548f8421..627bb5093317 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5242,6 +5242,176 @@ static void intel_iommu_aux_detach_device(struct iommu_domain *domain, aux_domain_remove_dev(to_dmar_domain(domain), dev); } +/* + * 2D array for converting and sanitizing IOMMU generic TLB granularity to + * VT-d granularity. Invalidation is typically included in the unmap operation + * as a result of DMA or VFIO unmap. However, for assigned devices guest + * owns the first level page tables. Invalidations of translation caches in the + * guest are trapped and passed down to the host. + * + * vIOMMU in the guest will only expose first level page tables, therefore + * we do not support IOTLB granularity for request without PASID (second level). + * + * For example, to find the VT-d granularity encoding for IOTLB + * type and page selective granularity within PASID: + * X: indexed by iommu cache type + * Y: indexed by enum iommu_inv_granularity + * [IOMMU_CACHE_INV_TYPE_IOTLB][IOMMU_INV_GRANU_ADDR] + */ + +const static int +inv_type_granu_table[IOMMU_CACHE_INV_TYPE_NR][IOMMU_INV_GRANU_NR] = { + /* + * PASID based IOTLB invalidation: PASID selective (per PASID), + * page selective (address granularity) + */ + {-EINVAL, QI_GRAN_NONG_PASID, QI_GRAN_PSI_PASID}, + /* PASID based dev TLBs */ + {-EINVAL, -EINVAL, QI_DEV_IOTLB_GRAN_PASID_SEL}, + /* PASID cache */ + {-EINVAL, -EINVAL, -EINVAL} +}; + +static inline int to_vtd_granularity(int type, int granu) +{ + return inv_type_granu_table[type][granu]; +} + +static inline u64 to_vtd_size(u64 granu_size, u64 nr_granules) +{ + u64 nr_pages = (granu_size * nr_granules) >> VTD_PAGE_SHIFT; + + /* VT-d size is encoded as 2^size of 4K pages, 0 for 4k, 9 for 2MB, etc. + * IOMMU cache invalidate API passes granu_size in bytes, and number of + * granu size in contiguous memory. + */ + return order_base_2(nr_pages); +} + +#ifdef CONFIG_INTEL_IOMMU_SVM +static int +intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev, + struct iommu_cache_invalidate_info *inv_info) +{ + struct dmar_domain *dmar_domain = to_dmar_domain(domain); + struct device_domain_info *info; + struct intel_iommu *iommu; + unsigned long flags; + int cache_type; + u8 bus, devfn; + u16 did, sid; + int ret = 0; + u64 size = 0; + + if (!inv_info || !dmar_domain || + inv_info->version != IOMMU_CACHE_INVALIDATE_INFO_VERSION_1) + return -EINVAL; + + if (!dev || !dev_is_pci(dev)) + return -ENODEV; + + iommu = device_to_iommu(dev, &bus, &devfn); + if (!iommu) + return -ENODEV; + + if (!(dmar_domain->flags & DOMAIN_FLAG_NESTING_MODE)) + return -EINVAL; + + spin_lock_irqsave(&device_domain_lock, flags); + spin_lock(&iommu->lock); + info = dev->archdata.iommu; + if (!info) { + ret = -EINVAL; + goto out_unlock; + } + did = dmar_domain->iommu_did[iommu->seq_id]; + sid = PCI_DEVID(bus, devfn); + + /* Size is only valid in address selective invalidation */ + if (inv_info->granularity != IOMMU_INV_GRANU_PASID) + size = to_vtd_size(inv_info->addr_info.granule_size, + inv_info->addr_info.nb_granules); + + for_each_set_bit(cache_type, + (unsigned long *)&inv_info->cache, + IOMMU_CACHE_INV_TYPE_NR) { + int granu = 0; + u64 pasid = 0; + + granu = to_vtd_granularity(cache_type, inv_info->granularity); + if (granu == -EINVAL) { + pr_err_ratelimited("Invalid cache type and granu combination %d/%d\n", + cache_type, inv_info->granularity); + break; + } + + /* + * PASID is stored in different locations based on the + * granularity. + */ + if (inv_info->granularity == IOMMU_INV_GRANU_PASID && + (inv_info->pasid_info.flags & IOMMU_INV_PASID_FLAGS_PASID)) + pasid = inv_info->pasid_info.pasid; + else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR && + (inv_info->addr_info.flags & IOMMU_INV_ADDR_FLAGS_PASID)) + pasid = inv_info->addr_info.pasid; + + switch (BIT(cache_type)) { + case IOMMU_CACHE_INV_TYPE_IOTLB: + if (inv_info->granularity == IOMMU_INV_GRANU_ADDR && + size && + (inv_info->addr_info.addr & ((BIT(VTD_PAGE_SHIFT + size)) - 1))) { + pr_err_ratelimited("Address out of range, 0x%llx, size order %llu\n", + inv_info->addr_info.addr, size); + ret = -ERANGE; + goto out_unlock; + } + + /* + * If granu is PASID-selective, address is ignored. + * We use npages = -1 to indicate that. + */ + qi_flush_piotlb(iommu, did, pasid, + mm_to_dma_pfn(inv_info->addr_info.addr), + (granu == QI_GRAN_NONG_PASID) ? -1 : 1 << size, + inv_info->addr_info.flags & IOMMU_INV_ADDR_FLAGS_LEAF); + + /* + * Always flush device IOTLB if ATS is enabled. vIOMMU + * in the guest may assume IOTLB flush is inclusive, + * which is more efficient. + */ + if (info->ats_enabled) + qi_flush_dev_iotlb_pasid(iommu, sid, + info->pfsid, pasid, + info->ats_qdep, + inv_info->addr_info.addr, + size, granu); + break; + case IOMMU_CACHE_INV_TYPE_DEV_IOTLB: + if (info->ats_enabled) + qi_flush_dev_iotlb_pasid(iommu, sid, + info->pfsid, pasid, + info->ats_qdep, + inv_info->addr_info.addr, + size, granu); + else + pr_warn_ratelimited("Passdown device IOTLB flush w/o ATS!\n"); + break; + default: + dev_err_ratelimited(dev, "Unsupported IOMMU invalidation type %d\n", + cache_type); + ret = -EINVAL; + } + } +out_unlock: + spin_unlock(&iommu->lock); + spin_unlock_irqrestore(&device_domain_lock, flags); + + return ret; +} +#endif + static int intel_iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t hpa, size_t size, int iommu_prot, gfp_t gfp) @@ -5781,6 +5951,7 @@ const struct iommu_ops intel_iommu_ops = { .def_domain_type = device_def_domain_type, .pgsize_bitmap = INTEL_IOMMU_PGSIZES, #ifdef CONFIG_INTEL_IOMMU_SVM + .cache_invalidate = intel_iommu_sva_invalidate, .sva_bind_gpasid = intel_svm_bind_gpasid, .sva_unbind_gpasid = intel_svm_unbind_gpasid, #endif