From 321bd212619a7269308696e4ddc446930ea73fad Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 24 Jun 2020 18:24:33 -0400 Subject: [PATCH 01/71] virtio: VIRTIO_F_IOMMU_PLATFORM -> VIRTIO_F_ACCESS_PLATFORM Rename the bit to match latest virtio spec. Add a compat macro to avoid breaking existing userspace. Signed-off-by: Michael S. Tsirkin Reviewed-by: David Hildenbrand --- arch/um/drivers/virtio_uml.c | 2 +- drivers/vdpa/ifcvf/ifcvf_base.h | 2 +- drivers/vdpa/vdpa_sim/vdpa_sim.c | 4 ++-- drivers/vhost/net.c | 4 ++-- drivers/vhost/vdpa.c | 2 +- drivers/virtio/virtio_balloon.c | 2 +- drivers/virtio/virtio_ring.c | 2 +- include/linux/virtio_config.h | 2 +- include/uapi/linux/virtio_config.h | 10 +++++++--- tools/virtio/linux/virtio_config.h | 2 +- 10 files changed, 18 insertions(+), 14 deletions(-) diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c index 351aee52aca6..a6c4bb6c2c01 100644 --- a/arch/um/drivers/virtio_uml.c +++ b/arch/um/drivers/virtio_uml.c @@ -385,7 +385,7 @@ static irqreturn_t vu_req_interrupt(int irq, void *data) } break; case VHOST_USER_SLAVE_IOTLB_MSG: - /* not supported - VIRTIO_F_IOMMU_PLATFORM */ + /* not supported - VIRTIO_F_ACCESS_PLATFORM */ case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG: /* not supported - VHOST_USER_PROTOCOL_F_HOST_NOTIFIER */ default: diff --git a/drivers/vdpa/ifcvf/ifcvf_base.h b/drivers/vdpa/ifcvf/ifcvf_base.h index f4554412e607..24af422b5a3e 100644 --- a/drivers/vdpa/ifcvf/ifcvf_base.h +++ b/drivers/vdpa/ifcvf/ifcvf_base.h @@ -29,7 +29,7 @@ (1ULL << VIRTIO_F_VERSION_1) | \ (1ULL << VIRTIO_NET_F_STATUS) | \ (1ULL << VIRTIO_F_ORDER_PLATFORM) | \ - (1ULL << VIRTIO_F_IOMMU_PLATFORM) | \ + (1ULL << VIRTIO_F_ACCESS_PLATFORM) | \ (1ULL << VIRTIO_NET_F_MRG_RXBUF)) /* Only one queue pair for now. */ diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index c7334cc65bb2..a9bc5e0fb353 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -55,7 +55,7 @@ struct vdpasim_virtqueue { static u64 vdpasim_features = (1ULL << VIRTIO_F_ANY_LAYOUT) | (1ULL << VIRTIO_F_VERSION_1) | - (1ULL << VIRTIO_F_IOMMU_PLATFORM); + (1ULL << VIRTIO_F_ACCESS_PLATFORM); /* State of each vdpasim device */ struct vdpasim { @@ -450,7 +450,7 @@ static int vdpasim_set_features(struct vdpa_device *vdpa, u64 features) struct vdpasim *vdpasim = vdpa_to_sim(vdpa); /* DMA mapping must be done by driver */ - if (!(features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))) + if (!(features & (1ULL << VIRTIO_F_ACCESS_PLATFORM))) return -EINVAL; vdpasim->features = features & vdpasim_features; diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index e992decfec53..8e0921d3805d 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -73,7 +73,7 @@ enum { VHOST_NET_FEATURES = VHOST_FEATURES | (1ULL << VHOST_NET_F_VIRTIO_NET_HDR) | (1ULL << VIRTIO_NET_F_MRG_RXBUF) | - (1ULL << VIRTIO_F_IOMMU_PLATFORM) + (1ULL << VIRTIO_F_ACCESS_PLATFORM) }; enum { @@ -1653,7 +1653,7 @@ static int vhost_net_set_features(struct vhost_net *n, u64 features) !vhost_log_access_ok(&n->dev)) goto out_unlock; - if ((features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))) { + if ((features & (1ULL << VIRTIO_F_ACCESS_PLATFORM))) { if (vhost_init_device_iotlb(&n->dev, true)) goto out_unlock; } diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index a54b60d6623f..18869a35d408 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -31,7 +31,7 @@ enum { (1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | (1ULL << VIRTIO_F_ANY_LAYOUT) | (1ULL << VIRTIO_F_VERSION_1) | - (1ULL << VIRTIO_F_IOMMU_PLATFORM) | + (1ULL << VIRTIO_F_ACCESS_PLATFORM) | (1ULL << VIRTIO_F_RING_PACKED) | (1ULL << VIRTIO_F_ORDER_PLATFORM) | (1ULL << VIRTIO_RING_F_INDIRECT_DESC) | diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 8be02f333b7a..54fd989f9353 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -1129,7 +1129,7 @@ static int virtballoon_validate(struct virtio_device *vdev) else if (!virtio_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON)) __virtio_clear_bit(vdev, VIRTIO_BALLOON_F_REPORTING); - __virtio_clear_bit(vdev, VIRTIO_F_IOMMU_PLATFORM); + __virtio_clear_bit(vdev, VIRTIO_F_ACCESS_PLATFORM); return 0; } diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 58b96baa8d48..a1a5c2a91426 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2225,7 +2225,7 @@ void vring_transport_features(struct virtio_device *vdev) break; case VIRTIO_F_VERSION_1: break; - case VIRTIO_F_IOMMU_PLATFORM: + case VIRTIO_F_ACCESS_PLATFORM: break; case VIRTIO_F_RING_PACKED: break; diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index bb4cc4910750..f2cc2a0df174 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -171,7 +171,7 @@ static inline bool virtio_has_iommu_quirk(const struct virtio_device *vdev) * Note the reverse polarity of the quirk feature (compared to most * other features), this is for compatibility with legacy systems. */ - return !virtio_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); + return !virtio_has_feature(vdev, VIRTIO_F_ACCESS_PLATFORM); } static inline diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h index ff8e7dc9d4dd..b5eda06f0d57 100644 --- a/include/uapi/linux/virtio_config.h +++ b/include/uapi/linux/virtio_config.h @@ -67,13 +67,17 @@ #define VIRTIO_F_VERSION_1 32 /* - * If clear - device has the IOMMU bypass quirk feature. - * If set - use platform tools to detect the IOMMU. + * If clear - device has the platform DMA (e.g. IOMMU) bypass quirk feature. + * If set - use platform DMA tools to access the memory. * * Note the reverse polarity (compared to most other features), * this is for compatibility with legacy systems. */ -#define VIRTIO_F_IOMMU_PLATFORM 33 +#define VIRTIO_F_ACCESS_PLATFORM 33 +#ifndef __KERNEL__ +/* Legacy name for VIRTIO_F_ACCESS_PLATFORM (for compatibility with old userspace) */ +#define VIRTIO_F_IOMMU_PLATFORM VIRTIO_F_ACCESS_PLATFORM +#endif /* __KERNEL__ */ /* This feature indicates support for the packed virtqueue layout. */ #define VIRTIO_F_RING_PACKED 34 diff --git a/tools/virtio/linux/virtio_config.h b/tools/virtio/linux/virtio_config.h index dbf14c1e2188..f99ae42668e0 100644 --- a/tools/virtio/linux/virtio_config.h +++ b/tools/virtio/linux/virtio_config.h @@ -51,7 +51,7 @@ static inline bool virtio_has_iommu_quirk(const struct virtio_device *vdev) * Note the reverse polarity of the quirk feature (compared to most * other features), this is for compatibility with legacy systems. */ - return !virtio_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); + return !virtio_has_feature(vdev, VIRTIO_F_ACCESS_PLATFORM); } static inline bool virtio_is_little_endian(struct virtio_device *vdev) From 24b6842ade6925199e182988259761504aacfbc0 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 24 Jun 2020 19:17:04 -0400 Subject: [PATCH 02/71] virtio: virtio_has_iommu_quirk -> virtio_has_dma_quirk Now that the corresponding feature bit has been renamed, rename the quirk too - it's about special ways to do DMA, not necessarily about the IOMMU. Signed-off-by: Michael S. Tsirkin --- drivers/gpu/drm/virtio/virtgpu_object.c | 2 +- drivers/gpu/drm/virtio/virtgpu_vq.c | 4 ++-- drivers/virtio/virtio_ring.c | 2 +- include/linux/virtio_config.h | 4 ++-- tools/virtio/linux/virtio_config.h | 4 ++-- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_object.c b/drivers/gpu/drm/virtio/virtgpu_object.c index 6ccbd01cd888..e8799ab0c753 100644 --- a/drivers/gpu/drm/virtio/virtgpu_object.c +++ b/drivers/gpu/drm/virtio/virtgpu_object.c @@ -141,7 +141,7 @@ static int virtio_gpu_object_shmem_init(struct virtio_gpu_device *vgdev, struct virtio_gpu_mem_entry **ents, unsigned int *nents) { - bool use_dma_api = !virtio_has_iommu_quirk(vgdev->vdev); + bool use_dma_api = !virtio_has_dma_quirk(vgdev->vdev); struct virtio_gpu_object_shmem *shmem = to_virtio_gpu_shmem(bo); struct scatterlist *sg; int si, ret; diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c index 9e663a5d9952..53af60d484a4 100644 --- a/drivers/gpu/drm/virtio/virtgpu_vq.c +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c @@ -599,7 +599,7 @@ void virtio_gpu_cmd_transfer_to_host_2d(struct virtio_gpu_device *vgdev, struct virtio_gpu_object *bo = gem_to_virtio_gpu_obj(objs->objs[0]); struct virtio_gpu_transfer_to_host_2d *cmd_p; struct virtio_gpu_vbuffer *vbuf; - bool use_dma_api = !virtio_has_iommu_quirk(vgdev->vdev); + bool use_dma_api = !virtio_has_dma_quirk(vgdev->vdev); struct virtio_gpu_object_shmem *shmem = to_virtio_gpu_shmem(bo); if (use_dma_api) @@ -1015,7 +1015,7 @@ void virtio_gpu_cmd_transfer_to_host_3d(struct virtio_gpu_device *vgdev, struct virtio_gpu_object *bo = gem_to_virtio_gpu_obj(objs->objs[0]); struct virtio_gpu_transfer_host_3d *cmd_p; struct virtio_gpu_vbuffer *vbuf; - bool use_dma_api = !virtio_has_iommu_quirk(vgdev->vdev); + bool use_dma_api = !virtio_has_dma_quirk(vgdev->vdev); struct virtio_gpu_object_shmem *shmem = to_virtio_gpu_shmem(bo); if (use_dma_api) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index a1a5c2a91426..34253cb69cb8 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -240,7 +240,7 @@ static inline bool virtqueue_use_indirect(struct virtqueue *_vq, static bool vring_use_dma_api(struct virtio_device *vdev) { - if (!virtio_has_iommu_quirk(vdev)) + if (!virtio_has_dma_quirk(vdev)) return true; /* Otherwise, we are left to guess. */ diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index f2cc2a0df174..3b4eae5ac5e3 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -162,10 +162,10 @@ static inline bool virtio_has_feature(const struct virtio_device *vdev, } /** - * virtio_has_iommu_quirk - determine whether this device has the iommu quirk + * virtio_has_dma_quirk - determine whether this device has the DMA quirk * @vdev: the device */ -static inline bool virtio_has_iommu_quirk(const struct virtio_device *vdev) +static inline bool virtio_has_dma_quirk(const struct virtio_device *vdev) { /* * Note the reverse polarity of the quirk feature (compared to most diff --git a/tools/virtio/linux/virtio_config.h b/tools/virtio/linux/virtio_config.h index f99ae42668e0..f2640e505c4e 100644 --- a/tools/virtio/linux/virtio_config.h +++ b/tools/virtio/linux/virtio_config.h @@ -42,10 +42,10 @@ static inline void __virtio_clear_bit(struct virtio_device *vdev, (__virtio_test_bit((dev), feature)) /** - * virtio_has_iommu_quirk - determine whether this device has the iommu quirk + * virtio_has_dma_quirk - determine whether this device has the DMA quirk * @vdev: the device */ -static inline bool virtio_has_iommu_quirk(const struct virtio_device *vdev) +static inline bool virtio_has_dma_quirk(const struct virtio_device *vdev) { /* * Note the reverse polarity of the quirk feature (compared to most From 8875bbba97087bf4a677071723d04fc00730f1e7 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 10 Jul 2020 06:34:49 -0400 Subject: [PATCH 03/71] virtio_balloon: fix sparse warning balloon uses virtio32_to_cpu instead of cpu_to_virtio32 to convert a native endian number to virtio. No practical difference but makes sparse warn. Fix it up. Signed-off-by: Michael S. Tsirkin Reviewed-by: Cornelia Huck Acked-by: David Hildenbrand Reviewed-by: Cornelia Huck --- drivers/virtio/virtio_balloon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 54fd989f9353..8bc1704ffdf3 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -600,7 +600,7 @@ static int send_cmd_id_start(struct virtio_balloon *vb) while (virtqueue_get_buf(vq, &unused)) ; - vb->cmd_id_active = virtio32_to_cpu(vb->vdev, + vb->cmd_id_active = cpu_to_virtio32(vb->vdev, virtio_balloon_cmd_id_received(vb)); sg_init_one(&sg, &vb->cmd_id_active, sizeof(vb->cmd_id_active)); err = virtqueue_add_outbuf(vq, &sg, 1, &vb->cmd_id_active, GFP_KERNEL); From 5487196878bc926a1ee15069c13aa48b9a894fab Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 10 Jul 2020 06:46:04 -0400 Subject: [PATCH 04/71] virtio_ring: sparse warning fixup virtio_store_mb was built with split ring in mind so it accepts __virtio16 arguments. Packed ring uses __le16 values, so sparse complains. It's just a store with some barriers so let's convert it to a macro, we don't loose too much type safety by doing that. Signed-off-by: Michael S. Tsirkin Acked-by: Cornelia Huck --- include/linux/virtio_ring.h | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h index 3dc70adfe5f5..b485b13fa50b 100644 --- a/include/linux/virtio_ring.h +++ b/include/linux/virtio_ring.h @@ -46,16 +46,15 @@ static inline void virtio_wmb(bool weak_barriers) dma_wmb(); } -static inline void virtio_store_mb(bool weak_barriers, - __virtio16 *p, __virtio16 v) -{ - if (weak_barriers) { - virt_store_mb(*p, v); - } else { - WRITE_ONCE(*p, v); - mb(); - } -} +#define virtio_store_mb(weak_barriers, p, v) \ +do { \ + if (weak_barriers) { \ + virt_store_mb(*p, v); \ + } else { \ + WRITE_ONCE(*p, v); \ + mb(); \ + } \ +} while (0) \ struct virtio_device; struct virtqueue; From a4235ec06acf05c58081700cda02dcd480d9e9cb Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 10 Jul 2020 03:20:21 -0400 Subject: [PATCH 05/71] virtio: allow __virtioXX, __leXX in config space Currently all config space fields are of the type __uXX. This confuses people and some drivers (notably vdpa) access them using CPU endian-ness - which only works well for legacy or LE platforms. Update virtio_cread/virtio_cwrite macros to allow __virtioXX and __leXX field types. Follow-up patches will convert config space to use these types. Signed-off-by: Michael S. Tsirkin Acked-by: Cornelia Huck --- include/linux/virtio_config.h | 50 +++++++++++++++++++++++++++++++++-- 1 file changed, 48 insertions(+), 2 deletions(-) diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 3b4eae5ac5e3..64da491936f7 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -6,6 +6,7 @@ #include #include #include +#include #include struct irq_affinity; @@ -287,12 +288,57 @@ static inline __virtio64 cpu_to_virtio64(struct virtio_device *vdev, u64 val) return __cpu_to_virtio64(virtio_is_little_endian(vdev), val); } +/* + * Only the checker differentiates between __virtioXX and __uXX types. But we + * try to share as much code as we can with the regular GCC build. + */ +#if !defined(CONFIG_CC_IS_GCC) && !defined(__CHECKER__) + +/* Not a checker - we can keep things simple */ +#define __virtio_native_typeof(x) typeof(x) + +#else + +/* + * We build this out of a couple of helper macros in a vain attempt to + * help you keep your lunch down while reading it. + */ +#define __virtio_pick_value(x, type, then, otherwise) \ + __builtin_choose_expr(__same_type(x, type), then, otherwise) + +#define __virtio_pick_type(x, type, then, otherwise) \ + __virtio_pick_value(x, type, (then)0, otherwise) + +#define __virtio_pick_endian(x, x16, x32, x64, otherwise) \ + __virtio_pick_type(x, x16, __u16, \ + __virtio_pick_type(x, x32, __u32, \ + __virtio_pick_type(x, x64, __u64, \ + otherwise))) + +#define __virtio_native_typeof(x) typeof( \ + __virtio_pick_type(x, __u8, __u8, \ + __virtio_pick_endian(x, __virtio16, __virtio32, __virtio64, \ + __virtio_pick_endian(x, __le16, __le32, __le64, \ + __virtio_pick_endian(x, __u16, __u32, __u64, \ + /* No other type allowed */ \ + (void)0))))) + +#endif + +#define __virtio_native_type(structname, member) \ + __virtio_native_typeof(((structname*)0)->member) + +#define __virtio_typecheck(structname, member, val) \ + /* Must match the member's type, and be integer */ \ + typecheck(__virtio_native_type(structname, member), (val)) + + /* Config space accessors. */ #define virtio_cread(vdev, structname, member, ptr) \ do { \ might_sleep(); \ /* Must match the member's type, and be integer */ \ - if (!typecheck(typeof((((structname*)0)->member)), *(ptr))) \ + if (!__virtio_typecheck(structname, member, *(ptr))) \ (*ptr) = 1; \ \ switch (sizeof(*ptr)) { \ @@ -322,7 +368,7 @@ static inline __virtio64 cpu_to_virtio64(struct virtio_device *vdev, u64 val) do { \ might_sleep(); \ /* Must match the member's type, and be integer */ \ - if (!typecheck(typeof((((structname*)0)->member)), *(ptr))) \ + if (!__virtio_typecheck(structname, member, *(ptr))) \ BUG_ON((*ptr) == 1); \ \ switch (sizeof(*ptr)) { \ From cae19a6386c86dec8ec2810a96d7497a2eec8d38 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 10 Jul 2020 07:17:13 -0400 Subject: [PATCH 06/71] virtio_9p: correct tags for config space fields Tag config space fields as having virtio endian-ness. Signed-off-by: Michael S. Tsirkin Reviewed-by: Cornelia Huck --- include/uapi/linux/virtio_9p.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/virtio_9p.h b/include/uapi/linux/virtio_9p.h index 277c4ad44e84..441047432258 100644 --- a/include/uapi/linux/virtio_9p.h +++ b/include/uapi/linux/virtio_9p.h @@ -25,7 +25,7 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ -#include +#include #include #include @@ -36,7 +36,7 @@ struct virtio_9p_config { /* length of the tag name */ - __u16 tag_len; + __virtio16 tag_len; /* non-NULL terminated tag name */ __u8 tag[0]; } __attribute__((packed)); From c73cb10cc4421baa669c1edd8211086280273216 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 10 Jul 2020 07:17:13 -0400 Subject: [PATCH 07/71] virtio_balloon: correct tags for config space fields Tag config space fields as having little endian-ness. Note that balloon is special: LE even when using the legacy interface. Signed-off-by: Michael S. Tsirkin Acked-by: David Hildenbrand Reviewed-by: Cornelia Huck --- include/uapi/linux/virtio_balloon.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h index dc3e656470dd..ddaa45e723c4 100644 --- a/include/uapi/linux/virtio_balloon.h +++ b/include/uapi/linux/virtio_balloon.h @@ -45,20 +45,20 @@ #define VIRTIO_BALLOON_CMD_ID_DONE 1 struct virtio_balloon_config { /* Number of pages host wants Guest to give up. */ - __u32 num_pages; + __le32 num_pages; /* Number of pages we've actually got in balloon. */ - __u32 actual; + __le32 actual; /* * Free page hint command id, readonly by guest. * Was previously named free_page_report_cmd_id so we * need to carry that name for legacy support. */ union { - __u32 free_page_hint_cmd_id; - __u32 free_page_report_cmd_id; /* deprecated */ + __le32 free_page_hint_cmd_id; + __le32 free_page_report_cmd_id; /* deprecated */ }; /* Stores PAGE_POISON if page poisoning is in use */ - __u32 poison_val; + __le32 poison_val; }; #define VIRTIO_BALLOON_S_SWAP_IN 0 /* Amount of memory swapped in */ From 40e04c488bd6ab1859778d40bf9bb3ca294ab97b Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 10 Jul 2020 07:17:13 -0400 Subject: [PATCH 08/71] virtio_blk: correct tags for config space fields Tag config space fields as having virtio endian-ness. Signed-off-by: Michael S. Tsirkin Reviewed-by: Cornelia Huck Reviewed-by: Stefano Garzarella Reviewed-by: Stefano Garzarella --- include/uapi/linux/virtio_blk.h | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h index 0f99d7b49ede..d888f013d9ff 100644 --- a/include/uapi/linux/virtio_blk.h +++ b/include/uapi/linux/virtio_blk.h @@ -57,20 +57,20 @@ struct virtio_blk_config { /* The capacity (in 512-byte sectors). */ - __u64 capacity; + __virtio64 capacity; /* The maximum segment size (if VIRTIO_BLK_F_SIZE_MAX) */ - __u32 size_max; + __virtio32 size_max; /* The maximum number of segments (if VIRTIO_BLK_F_SEG_MAX) */ - __u32 seg_max; + __virtio32 seg_max; /* geometry of the device (if VIRTIO_BLK_F_GEOMETRY) */ struct virtio_blk_geometry { - __u16 cylinders; + __virtio16 cylinders; __u8 heads; __u8 sectors; } geometry; /* block size of device (if VIRTIO_BLK_F_BLK_SIZE) */ - __u32 blk_size; + __virtio32 blk_size; /* the next 4 entries are guarded by VIRTIO_BLK_F_TOPOLOGY */ /* exponent for physical block per logical block. */ @@ -78,42 +78,42 @@ struct virtio_blk_config { /* alignment offset in logical blocks. */ __u8 alignment_offset; /* minimum I/O size without performance penalty in logical blocks. */ - __u16 min_io_size; + __virtio16 min_io_size; /* optimal sustained I/O size in logical blocks. */ - __u32 opt_io_size; + __virtio32 opt_io_size; /* writeback mode (if VIRTIO_BLK_F_CONFIG_WCE) */ __u8 wce; __u8 unused; /* number of vqs, only available when VIRTIO_BLK_F_MQ is set */ - __u16 num_queues; + __virtio16 num_queues; /* the next 3 entries are guarded by VIRTIO_BLK_F_DISCARD */ /* * The maximum discard sectors (in 512-byte sectors) for * one segment. */ - __u32 max_discard_sectors; + __virtio32 max_discard_sectors; /* * The maximum number of discard segments in a * discard command. */ - __u32 max_discard_seg; + __virtio32 max_discard_seg; /* Discard commands must be aligned to this number of sectors. */ - __u32 discard_sector_alignment; + __virtio32 discard_sector_alignment; /* the next 3 entries are guarded by VIRTIO_BLK_F_WRITE_ZEROES */ /* * The maximum number of write zeroes sectors (in 512-byte sectors) in * one segment. */ - __u32 max_write_zeroes_sectors; + __virtio32 max_write_zeroes_sectors; /* * The maximum number of segments in a write zeroes * command. */ - __u32 max_write_zeroes_seg; + __virtio32 max_write_zeroes_seg; /* * Set if a VIRTIO_BLK_T_WRITE_ZEROES request may result in the * deallocation of one or more of the sectors. From dbe2dc8c5838ef415620a4fe691570b062ab046f Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 10 Jul 2020 07:17:13 -0400 Subject: [PATCH 09/71] virtio_console: correct tags for config space fields Tag config space fields as having virtio endian-ness. Signed-off-by: Michael S. Tsirkin Reviewed-by: Cornelia Huck --- include/uapi/linux/virtio_console.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/uapi/linux/virtio_console.h b/include/uapi/linux/virtio_console.h index b7fb108c9310..7e6ec2ff0560 100644 --- a/include/uapi/linux/virtio_console.h +++ b/include/uapi/linux/virtio_console.h @@ -45,13 +45,13 @@ struct virtio_console_config { /* colums of the screens */ - __u16 cols; + __virtio16 cols; /* rows of the screens */ - __u16 rows; + __virtio16 rows; /* max. number of ports this device can hold */ - __u32 max_nr_ports; + __virtio32 max_nr_ports; /* emergency write register */ - __u32 emerg_wr; + __virtio32 emerg_wr; } __attribute__((packed)); /* From 24bcf35b695ef5228f3035eb979cc2de571d560b Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 10 Jul 2020 07:17:13 -0400 Subject: [PATCH 10/71] virtio_crypto: correct tags for config space fields Since crypto is a modern-only device, tag config space fields as having little endian-ness. Signed-off-by: Michael S. Tsirkin Reviewed-by: Cornelia Huck --- include/uapi/linux/virtio_crypto.h | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/include/uapi/linux/virtio_crypto.h b/include/uapi/linux/virtio_crypto.h index 50cdc8aebfcf..a03932f10565 100644 --- a/include/uapi/linux/virtio_crypto.h +++ b/include/uapi/linux/virtio_crypto.h @@ -414,33 +414,33 @@ struct virtio_crypto_op_data_req { struct virtio_crypto_config { /* See VIRTIO_CRYPTO_OP_* above */ - __u32 status; + __le32 status; /* * Maximum number of data queue */ - __u32 max_dataqueues; + __le32 max_dataqueues; /* * Specifies the services mask which the device support, * see VIRTIO_CRYPTO_SERVICE_* above */ - __u32 crypto_services; + __le32 crypto_services; /* Detailed algorithms mask */ - __u32 cipher_algo_l; - __u32 cipher_algo_h; - __u32 hash_algo; - __u32 mac_algo_l; - __u32 mac_algo_h; - __u32 aead_algo; + __le32 cipher_algo_l; + __le32 cipher_algo_h; + __le32 hash_algo; + __le32 mac_algo_l; + __le32 mac_algo_h; + __le32 aead_algo; /* Maximum length of cipher key */ - __u32 max_cipher_key_len; + __le32 max_cipher_key_len; /* Maximum length of authenticated key */ - __u32 max_auth_key_len; - __u32 reserve; + __le32 max_auth_key_len; + __le32 reserve; /* Maximum size of each crypto request's content */ - __u64 max_size; + __le64 max_size; }; struct virtio_crypto_inhdr { From fc4a1accbb4ef372bb55b7ab161cf88e3b631935 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 10 Jul 2020 07:17:13 -0400 Subject: [PATCH 11/71] virtio_fs: correct tags for config space fields Since fs is a modern-only device, tag config space fields as having little endian-ness. Signed-off-by: Michael S. Tsirkin Acked-by: Vivek Goyal Acked-by: Vivek Goyal Reviewed-by: Cornelia Huck --- include/uapi/linux/virtio_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/virtio_fs.h b/include/uapi/linux/virtio_fs.h index b02eb2ac3d99..3056b6e9f8ce 100644 --- a/include/uapi/linux/virtio_fs.h +++ b/include/uapi/linux/virtio_fs.h @@ -13,7 +13,7 @@ struct virtio_fs_config { __u8 tag[36]; /* Number of request queues */ - __u32 num_request_queues; + __le32 num_request_queues; } __attribute__((packed)); #endif /* _UAPI_LINUX_VIRTIO_FS_H */ From f378444b7c97e39358de5d50d01fb0e92f259073 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 10 Jul 2020 07:17:13 -0400 Subject: [PATCH 12/71] virtio_gpu: correct tags for config space fields Since gpu is a modern-only device, tag config space fields as having little endian-ness. Signed-off-by: Michael S. Tsirkin Reviewed-by: Cornelia Huck --- include/uapi/linux/virtio_gpu.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h index 0c85914d9369..ccbd174ef321 100644 --- a/include/uapi/linux/virtio_gpu.h +++ b/include/uapi/linux/virtio_gpu.h @@ -320,10 +320,10 @@ struct virtio_gpu_resp_edid { #define VIRTIO_GPU_EVENT_DISPLAY (1 << 0) struct virtio_gpu_config { - __u32 events_read; - __u32 events_clear; - __u32 num_scanouts; - __u32 num_capsets; + __le32 events_read; + __le32 events_clear; + __le32 num_scanouts; + __le32 num_capsets; }; /* simple formats for fbcon/X use */ From 924b59a6dfa85dc9eac4c7f2fe1857bba2cb2510 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 10 Jul 2020 07:17:13 -0400 Subject: [PATCH 13/71] virtio_input: correct tags for config space fields Since this is a modern-only device, tag config space fields as having little endian-ness. Signed-off-by: Michael S. Tsirkin Reviewed-by: Gerd Hoffmann Reviewed-by: Gerd Hoffmann Reviewed-by: Cornelia Huck --- include/uapi/linux/virtio_input.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/include/uapi/linux/virtio_input.h b/include/uapi/linux/virtio_input.h index a7fe5c8fb135..52084b1fb965 100644 --- a/include/uapi/linux/virtio_input.h +++ b/include/uapi/linux/virtio_input.h @@ -40,18 +40,18 @@ enum virtio_input_config_select { }; struct virtio_input_absinfo { - __u32 min; - __u32 max; - __u32 fuzz; - __u32 flat; - __u32 res; + __le32 min; + __le32 max; + __le32 fuzz; + __le32 flat; + __le32 res; }; struct virtio_input_devids { - __u16 bustype; - __u16 vendor; - __u16 product; - __u16 version; + __le16 bustype; + __le16 vendor; + __le16 product; + __le16 version; }; struct virtio_input_config { From 0ebcffcc2731682777bab19b51a512d8f31e1bdd Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 10 Jul 2020 07:17:13 -0400 Subject: [PATCH 14/71] virtio_iommu: correct tags for config space fields Since this is a modern-only device, tag config space fields as having little endian-ness. Signed-off-by: Michael S. Tsirkin Reviewed-by: Jean-Philippe Brucker Reviewed-by: Jean-Philippe Brucker Reviewed-by: Cornelia Huck --- include/uapi/linux/virtio_iommu.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/uapi/linux/virtio_iommu.h b/include/uapi/linux/virtio_iommu.h index 48e3c29223b5..237e36a280cb 100644 --- a/include/uapi/linux/virtio_iommu.h +++ b/include/uapi/linux/virtio_iommu.h @@ -18,24 +18,24 @@ #define VIRTIO_IOMMU_F_MMIO 5 struct virtio_iommu_range_64 { - __u64 start; - __u64 end; + __le64 start; + __le64 end; }; struct virtio_iommu_range_32 { - __u32 start; - __u32 end; + __le32 start; + __le32 end; }; struct virtio_iommu_config { /* Supported page sizes */ - __u64 page_size_mask; + __le64 page_size_mask; /* Supported IOVA range */ struct virtio_iommu_range_64 input_range; /* Max domain ID size */ struct virtio_iommu_range_32 domain_range; /* Probe buffer size */ - __u32 probe_size; + __le32 probe_size; }; /* Request types */ From 79268954424771185fb4ca304786dd561a272246 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 10 Jul 2020 07:17:13 -0400 Subject: [PATCH 15/71] virtio_mem: correct tags for config space fields Since this is a modern-only device, tag config space fields as having little endian-ness. TODO: check other uses of __virtioXX types in this header, should probably be __leXX. Signed-off-by: Michael S. Tsirkin Acked-by: David Hildenbrand Reviewed-by: Cornelia Huck --- include/uapi/linux/virtio_mem.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/include/uapi/linux/virtio_mem.h b/include/uapi/linux/virtio_mem.h index a9ffe041843c..70e01c687d5e 100644 --- a/include/uapi/linux/virtio_mem.h +++ b/include/uapi/linux/virtio_mem.h @@ -185,27 +185,27 @@ struct virtio_mem_resp { struct virtio_mem_config { /* Block size and alignment. Cannot change. */ - __u64 block_size; + __le64 block_size; /* Valid with VIRTIO_MEM_F_ACPI_PXM. Cannot change. */ - __u16 node_id; + __le16 node_id; __u8 padding[6]; /* Start address of the memory region. Cannot change. */ - __u64 addr; + __le64 addr; /* Region size (maximum). Cannot change. */ - __u64 region_size; + __le64 region_size; /* * Currently usable region size. Can grow up to region_size. Can * shrink due to VIRTIO_MEM_REQ_UNPLUG_ALL (in which case no config * update will be sent). */ - __u64 usable_region_size; + __le64 usable_region_size; /* * Currently used size. Changes due to plug/unplug requests, but no * config updates will be sent. */ - __u64 plugged_size; + __le64 plugged_size; /* Requested size. New plug requests cannot exceed it. Can change. */ - __u64 requested_size; + __le64 requested_size; }; #endif /* _LINUX_VIRTIO_MEM_H */ From 577e677a785357542311a645eeb1756cd83988be Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 10 Jul 2020 07:17:13 -0400 Subject: [PATCH 16/71] virtio_net: correct tags for config space fields Tag config space fields as having virtio endian-ness. Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_net.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h index 19d23e5baa4e..27d996f29dd1 100644 --- a/include/uapi/linux/virtio_net.h +++ b/include/uapi/linux/virtio_net.h @@ -87,19 +87,19 @@ struct virtio_net_config { /* The config defining mac address (if VIRTIO_NET_F_MAC) */ __u8 mac[ETH_ALEN]; /* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */ - __u16 status; + __virtio16 status; /* Maximum number of each of transmit and receive queues; * see VIRTIO_NET_F_MQ and VIRTIO_NET_CTRL_MQ. * Legal values are between 1 and 0x8000 */ - __u16 max_virtqueue_pairs; + __virtio16 max_virtqueue_pairs; /* Default maximum transmit unit advice */ - __u16 mtu; + __virtio16 mtu; /* * speed, in units of 1Mb. All values 0 to INT_MAX are legal. * Any other value stands for unknown. */ - __u32 speed; + __virtio32 speed; /* * 0x00 - half duplex * 0x01 - full duplex From a28feb855cc0ad452acd3dfe2b8f2841927da5f1 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 10 Jul 2020 07:17:13 -0400 Subject: [PATCH 17/71] virtio_pmem: correct tags for config space fields Since this is a modern-only device, tag config space fields as having little endian-ness. Signed-off-by: Michael S. Tsirkin Reviewed-by: Cornelia Huck --- include/uapi/linux/virtio_pmem.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/virtio_pmem.h b/include/uapi/linux/virtio_pmem.h index b022787ffb94..d676b3620383 100644 --- a/include/uapi/linux/virtio_pmem.h +++ b/include/uapi/linux/virtio_pmem.h @@ -15,8 +15,8 @@ #include struct virtio_pmem_config { - __u64 start; - __u64 size; + __le64 start; + __le64 size; }; #define VIRTIO_PMEM_REQ_TYPE_FLUSH 0 From 965b5350514b597dc6347b733127e180844aeb43 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 10 Jul 2020 07:17:13 -0400 Subject: [PATCH 18/71] virtio_scsi: correct tags for config space fields Tag config space fields as having virtio endian-ness. Signed-off-by: Michael S. Tsirkin Reviewed-by: Cornelia Huck --- drivers/scsi/virtio_scsi.c | 4 ++-- include/uapi/linux/virtio_scsi.h | 20 ++++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index 0e0910c5b942..c36aeb9a1330 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -746,14 +746,14 @@ static struct scsi_host_template virtscsi_host_template = { #define virtscsi_config_get(vdev, fld) \ ({ \ - typeof(((struct virtio_scsi_config *)0)->fld) __val; \ + __virtio_native_type(struct virtio_scsi_config, fld) __val; \ virtio_cread(vdev, struct virtio_scsi_config, fld, &__val); \ __val; \ }) #define virtscsi_config_set(vdev, fld, val) \ do { \ - typeof(((struct virtio_scsi_config *)0)->fld) __val = (val); \ + __virtio_native_type(struct virtio_scsi_config, fld) __val = (val); \ virtio_cwrite(vdev, struct virtio_scsi_config, fld, &__val); \ } while(0) diff --git a/include/uapi/linux/virtio_scsi.h b/include/uapi/linux/virtio_scsi.h index cc18ef8825c0..0abaae4027c0 100644 --- a/include/uapi/linux/virtio_scsi.h +++ b/include/uapi/linux/virtio_scsi.h @@ -103,16 +103,16 @@ struct virtio_scsi_event { } __attribute__((packed)); struct virtio_scsi_config { - __u32 num_queues; - __u32 seg_max; - __u32 max_sectors; - __u32 cmd_per_lun; - __u32 event_info_size; - __u32 sense_size; - __u32 cdb_size; - __u16 max_channel; - __u16 max_target; - __u32 max_lun; + __virtio32 num_queues; + __virtio32 seg_max; + __virtio32 max_sectors; + __virtio32 cmd_per_lun; + __virtio32 event_info_size; + __virtio32 sense_size; + __virtio32 cdb_size; + __virtio16 max_channel; + __virtio16 max_target; + __virtio32 max_lun; } __attribute__((packed)); /* Feature Bits */ From 4a04cfb0eb5e00432f9ff978f2b81bd1736e85db Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 10 Jul 2020 07:55:52 -0400 Subject: [PATCH 19/71] virtio_config: disallow native type fields Transitional devices should all use __virtioXX types (and __leXX for fields not present in legacy devices). Modern ones should use __leXX. _uXX type would be a bug. Let's prevent that. Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_config.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 64da491936f7..c68f58f3bf34 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -319,9 +319,8 @@ static inline __virtio64 cpu_to_virtio64(struct virtio_device *vdev, u64 val) __virtio_pick_type(x, __u8, __u8, \ __virtio_pick_endian(x, __virtio16, __virtio32, __virtio64, \ __virtio_pick_endian(x, __le16, __le32, __le64, \ - __virtio_pick_endian(x, __u16, __u32, __u64, \ - /* No other type allowed */ \ - (void)0))))) + /* No other type allowed */ \ + (void)0)))) #endif From 03bea764bf61c9f9918324bda7362616024386e8 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 12 Jul 2020 10:56:34 -0400 Subject: [PATCH 20/71] mlxbf-tmfifo: sparse tags for config access mlxbf-tmfifo accesses config space using native types - which works for it since the legacy virtio native types. This will break if it ever needs to support modern virtio, so with new tags previously introduced for virtio net config, sparse now warns for this in drivers. Since this is a legacy only device, fix it up using virtio_legacy_is_little_endian for now. No functional changes. Signed-off-by: Michael S. Tsirkin Acked-by: Cornelia Huck Acked-by: Andy Shevchenko Acked-by: Andy Shevchenko --- drivers/platform/mellanox/mlxbf-tmfifo.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c b/drivers/platform/mellanox/mlxbf-tmfifo.c index 5739a9669b29..bbc4e71a16ff 100644 --- a/drivers/platform/mellanox/mlxbf-tmfifo.c +++ b/drivers/platform/mellanox/mlxbf-tmfifo.c @@ -625,7 +625,10 @@ static void mlxbf_tmfifo_rxtx_header(struct mlxbf_tmfifo_vring *vring, vdev_id = VIRTIO_ID_NET; hdr_len = sizeof(struct virtio_net_hdr); config = &fifo->vdev[vdev_id]->config.net; - if (ntohs(hdr.len) > config->mtu + + /* A legacy-only interface for now. */ + if (ntohs(hdr.len) > + __virtio16_to_cpu(virtio_legacy_is_little_endian(), + config->mtu) + MLXBF_TMFIFO_NET_L2_OVERHEAD) return; } else { @@ -1231,8 +1234,12 @@ static int mlxbf_tmfifo_probe(struct platform_device *pdev) /* Create the network vdev. */ memset(&net_config, 0, sizeof(net_config)); - net_config.mtu = ETH_DATA_LEN; - net_config.status = VIRTIO_NET_S_LINK_UP; + + /* A legacy-only interface for now. */ + net_config.mtu = __cpu_to_virtio16(virtio_legacy_is_little_endian(), + ETH_DATA_LEN); + net_config.status = __cpu_to_virtio16(virtio_legacy_is_little_endian(), + VIRTIO_NET_S_LINK_UP); mlxbf_tmfifo_get_cfg_mac(net_config.mac); rc = mlxbf_tmfifo_create_vdev(dev, fifo, VIRTIO_ID_NET, MLXBF_TMFIFO_NET_FEATURES, &net_config, From 452639a64ad880792652b6d20cc5c8dd4ecf27d9 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 27 Jul 2020 10:51:55 -0400 Subject: [PATCH 21/71] vdpa: make sure set_features is invoked for legacy Some legacy guests just assume features are 0 after reset. We detect that config space is accessed before features are set and set features to 0 automatically. Note: some legacy guests might not even access config space, if this is reported in the field we might need to catch a kick to handle these. Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa.c | 1 + include/linux/vdpa.h | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index de211ef3738c..7105265e4793 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -96,6 +96,7 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent, vdev->dev.release = vdpa_release_dev; vdev->index = err; vdev->config = config; + vdev->features_valid = false; err = dev_set_name(&vdev->dev, "vdpa%u", vdev->index); if (err) diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 239db794357c..29b8296f1414 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -33,12 +33,14 @@ struct vdpa_notification_area { * @dma_dev: the actual device that is performing DMA * @config: the configuration ops for this device. * @index: device index + * @features_valid: were features initialized? for legacy guests */ struct vdpa_device { struct device dev; struct device *dma_dev; const struct vdpa_config_ops *config; unsigned int index; + bool features_valid; }; /** @@ -266,4 +268,36 @@ static inline struct device *vdpa_get_dma_dev(struct vdpa_device *vdev) { return vdev->dma_dev; } + +static inline void vdpa_reset(struct vdpa_device *vdev) +{ + const struct vdpa_config_ops *ops = vdev->config; + + vdev->features_valid = false; + ops->set_status(vdev, 0); +} + +static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features) +{ + const struct vdpa_config_ops *ops = vdev->config; + + vdev->features_valid = true; + return ops->set_features(vdev, features); +} + + +static inline void vdpa_get_config(struct vdpa_device *vdev, unsigned offset, + void *buf, unsigned int len) +{ + const struct vdpa_config_ops *ops = vdev->config; + + /* + * Config accesses aren't supposed to trigger before features are set. + * If it does happen we assume a legacy guest. + */ + if (!vdev->features_valid) + vdpa_set_features(vdev, 0); + ops->get_config(vdev, offset, buf, len); +} + #endif /* _LINUX_VDPA_H */ From 0d234007a5f8914ba94388b2ed93e0fd9b12e68c Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 27 Jul 2020 10:58:18 -0400 Subject: [PATCH 22/71] vhost/vdpa: switch to new helpers For new helpers handling legacy features to be effective, vhost needs to invoke them. Tie them in. Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vdpa.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 18869a35d408..3674404688f5 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -118,9 +118,8 @@ static irqreturn_t vhost_vdpa_config_cb(void *private) static void vhost_vdpa_reset(struct vhost_vdpa *v) { struct vdpa_device *vdpa = v->vdpa; - const struct vdpa_config_ops *ops = vdpa->config; - ops->set_status(vdpa, 0); + vdpa_reset(vdpa); } static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp) @@ -196,7 +195,6 @@ static long vhost_vdpa_get_config(struct vhost_vdpa *v, struct vhost_vdpa_config __user *c) { struct vdpa_device *vdpa = v->vdpa; - const struct vdpa_config_ops *ops = vdpa->config; struct vhost_vdpa_config config; unsigned long size = offsetof(struct vhost_vdpa_config, buf); u8 *buf; @@ -209,7 +207,7 @@ static long vhost_vdpa_get_config(struct vhost_vdpa *v, if (!buf) return -ENOMEM; - ops->get_config(vdpa, config.off, buf, config.len); + vdpa_get_config(vdpa, config.off, buf, config.len); if (copy_to_user(c->buf, buf, config.len)) { kvfree(buf); @@ -282,7 +280,7 @@ static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep) if (features & ~vhost_vdpa_features[v->virtio_id]) return -EINVAL; - if (ops->set_features(vdpa, features)) + if (vdpa_set_features(vdpa, features)) return -EINVAL; return 0; From 639916734754b20fc34d956515a67b6a05776534 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 27 Jul 2020 10:59:02 -0400 Subject: [PATCH 23/71] virtio_vdpa: legacy features handling We normally expect vdpa to use the modern interface. However for consistency, let's use same APIs as vhost for legacy guests. Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_vdpa.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c index c30eb55030be..4a9ddb44b2a7 100644 --- a/drivers/virtio/virtio_vdpa.c +++ b/drivers/virtio/virtio_vdpa.c @@ -57,9 +57,8 @@ static void virtio_vdpa_get(struct virtio_device *vdev, unsigned offset, void *buf, unsigned len) { struct vdpa_device *vdpa = vd_get_vdpa(vdev); - const struct vdpa_config_ops *ops = vdpa->config; - ops->get_config(vdpa, offset, buf, len); + vdpa_get_config(vdpa, offset, buf, len); } static void virtio_vdpa_set(struct virtio_device *vdev, unsigned offset, @@ -101,9 +100,8 @@ static void virtio_vdpa_set_status(struct virtio_device *vdev, u8 status) static void virtio_vdpa_reset(struct virtio_device *vdev) { struct vdpa_device *vdpa = vd_get_vdpa(vdev); - const struct vdpa_config_ops *ops = vdpa->config; - return ops->set_status(vdpa, 0); + vdpa_reset(vdpa); } static bool virtio_vdpa_notify(struct virtqueue *vq) @@ -294,12 +292,11 @@ static u64 virtio_vdpa_get_features(struct virtio_device *vdev) static int virtio_vdpa_finalize_features(struct virtio_device *vdev) { struct vdpa_device *vdpa = vd_get_vdpa(vdev); - const struct vdpa_config_ops *ops = vdpa->config; /* Give virtio_ring a chance to accept features. */ vring_transport_features(vdev); - return ops->set_features(vdpa, vdev->features); + return vdpa_set_features(vdpa, vdev->features); } static const char *virtio_vdpa_bus_name(struct virtio_device *vdev) From 5d7d0f387ae1b933ff9771dc921f5e2f65271df9 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 12 Jul 2020 10:57:02 -0400 Subject: [PATCH 24/71] vdpa_sim: fix endian-ness of config space VDPA sim accesses config space as native endian - this is wrong since it's a modern device and actually uses LE. It only supports modern guests so we could punt and just force LE, but let's use the full virtio APIs since people tend to copy/paste code, and this is not data path anyway. Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 33 +++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index a9bc5e0fb353..b7d5727fde4c 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -72,6 +73,23 @@ struct vdpasim { u64 features; }; +/* TODO: cross-endian support */ +static inline bool vdpasim_is_little_endian(struct vdpasim *vdpasim) +{ + return virtio_legacy_is_little_endian() || + (vdpasim->features & (1ULL << VIRTIO_F_VERSION_1)); +} + +static inline u16 vdpasim16_to_cpu(struct vdpasim *vdpasim, __virtio16 val) +{ + return __virtio16_to_cpu(vdpasim_is_little_endian(vdpasim), val); +} + +static inline __virtio16 cpu_to_vdpasim16(struct vdpasim *vdpasim, u16 val) +{ + return __cpu_to_virtio16(vdpasim_is_little_endian(vdpasim), val); +} + static struct vdpasim *vdpasim_dev; static struct vdpasim *vdpa_to_sim(struct vdpa_device *vdpa) @@ -306,7 +324,6 @@ static const struct vdpa_config_ops vdpasim_net_config_ops; static struct vdpasim *vdpasim_create(void) { - struct virtio_net_config *config; struct vdpasim *vdpasim; struct device *dev; int ret = -ENOMEM; @@ -331,10 +348,7 @@ static struct vdpasim *vdpasim_create(void) if (!vdpasim->buffer) goto err_iommu; - config = &vdpasim->config; - config->mtu = 1500; - config->status = VIRTIO_NET_S_LINK_UP; - eth_random_addr(config->mac); + eth_random_addr(vdpasim->config.mac); vringh_set_iotlb(&vdpasim->vqs[0].vring, vdpasim->iommu); vringh_set_iotlb(&vdpasim->vqs[1].vring, vdpasim->iommu); @@ -448,6 +462,7 @@ static u64 vdpasim_get_features(struct vdpa_device *vdpa) static int vdpasim_set_features(struct vdpa_device *vdpa, u64 features) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + struct virtio_net_config *config = &vdpasim->config; /* DMA mapping must be done by driver */ if (!(features & (1ULL << VIRTIO_F_ACCESS_PLATFORM))) @@ -455,6 +470,14 @@ static int vdpasim_set_features(struct vdpa_device *vdpa, u64 features) vdpasim->features = features & vdpasim_features; + /* We generally only know whether guest is using the legacy interface + * here, so generally that's the earliest we can set config fields. + * Note: We actually require VIRTIO_F_ACCESS_PLATFORM above which + * implies VIRTIO_F_VERSION_1, but let's not try to be clever here. + */ + + config->mtu = cpu_to_vdpasim16(vdpasim, 1500); + config->status = cpu_to_vdpasim16(vdpasim, VIRTIO_NET_S_LINK_UP); return 0; } From cacaf775c699e9e8473491197587535f1c10ac8f Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 30 Jul 2020 16:12:40 -0400 Subject: [PATCH 25/71] virtio_config: cread/write cleanup Use vars of the correct type instead of casting. Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_config.h | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index c68f58f3bf34..5c3b02245ecd 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -444,53 +444,60 @@ static inline void virtio_cwrite8(struct virtio_device *vdev, static inline u16 virtio_cread16(struct virtio_device *vdev, unsigned int offset) { - u16 ret; + __virtio16 ret; might_sleep(); vdev->config->get(vdev, offset, &ret, sizeof(ret)); - return virtio16_to_cpu(vdev, (__force __virtio16)ret); + return virtio16_to_cpu(vdev, ret); } static inline void virtio_cwrite16(struct virtio_device *vdev, unsigned int offset, u16 val) { + __virtio16 v; + might_sleep(); - val = (__force u16)cpu_to_virtio16(vdev, val); - vdev->config->set(vdev, offset, &val, sizeof(val)); + v = cpu_to_virtio16(vdev, val); + vdev->config->set(vdev, offset, &v, sizeof(v)); } static inline u32 virtio_cread32(struct virtio_device *vdev, unsigned int offset) { - u32 ret; + __virtio32 ret; might_sleep(); vdev->config->get(vdev, offset, &ret, sizeof(ret)); - return virtio32_to_cpu(vdev, (__force __virtio32)ret); + return virtio32_to_cpu(vdev, ret); } static inline void virtio_cwrite32(struct virtio_device *vdev, unsigned int offset, u32 val) { + __virtio32 v; + might_sleep(); - val = (__force u32)cpu_to_virtio32(vdev, val); - vdev->config->set(vdev, offset, &val, sizeof(val)); + v = cpu_to_virtio32(vdev, val); + vdev->config->set(vdev, offset, &v, sizeof(v)); } static inline u64 virtio_cread64(struct virtio_device *vdev, unsigned int offset) { - u64 ret; + __virtio64 ret; + __virtio_cread_many(vdev, offset, &ret, 1, sizeof(ret)); - return virtio64_to_cpu(vdev, (__force __virtio64)ret); + return virtio64_to_cpu(vdev, ret); } static inline void virtio_cwrite64(struct virtio_device *vdev, unsigned int offset, u64 val) { + __virtio64 v; + might_sleep(); - val = (__force u64)cpu_to_virtio64(vdev, val); - vdev->config->set(vdev, offset, &val, sizeof(val)); + v = cpu_to_virtio64(vdev, val); + vdev->config->set(vdev, offset, &v, sizeof(v)); } /* Conditional config space accessors. */ From a5b90f2db8e0ef6504695cbd36a65fd8296338ee Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 3 Aug 2020 16:08:11 -0400 Subject: [PATCH 26/71] virtio_config: rewrite using _Generic Min compiler version has been raised, so that's ok now. Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_config.h | 163 ++++++++++++++++------------------ 1 file changed, 77 insertions(+), 86 deletions(-) diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 5c3b02245ecd..7fa000f02721 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -288,112 +288,103 @@ static inline __virtio64 cpu_to_virtio64(struct virtio_device *vdev, u64 val) return __cpu_to_virtio64(virtio_is_little_endian(vdev), val); } -/* - * Only the checker differentiates between __virtioXX and __uXX types. But we - * try to share as much code as we can with the regular GCC build. - */ -#if !defined(CONFIG_CC_IS_GCC) && !defined(__CHECKER__) +#define virtio_to_cpu(vdev, x) \ + _Generic((x), \ + __u8: (x), \ + __virtio16: virtio16_to_cpu((vdev), (x)), \ + __virtio32: virtio32_to_cpu((vdev), (x)), \ + __virtio64: virtio64_to_cpu((vdev), (x)), \ + /* + * Why define a default? checker can distinguish between + * e.g. __u16, __le16 and __virtio16, but GCC can't so + * attempts to define variants for both look like a duplicate + * variant to it. + */ \ + default: _Generic((x), \ + __u8: (x), \ + __le16: virtio16_to_cpu((vdev), (__force __virtio16)(x)), \ + __le32: virtio32_to_cpu((vdev), (__force __virtio32)(x)), \ + __le64: virtio64_to_cpu((vdev), (__force __virtio64)(x)), \ + default: _Generic((x), \ + __u8: (x), \ + __u16: virtio16_to_cpu((vdev), (__force __virtio16)(x)), \ + __u32: virtio32_to_cpu((vdev), (__force __virtio32)(x)), \ + __u64: virtio64_to_cpu((vdev), (__force __virtio64)(x)) \ + ) \ + ) \ + ) -/* Not a checker - we can keep things simple */ -#define __virtio_native_typeof(x) typeof(x) - -#else - -/* - * We build this out of a couple of helper macros in a vain attempt to - * help you keep your lunch down while reading it. - */ -#define __virtio_pick_value(x, type, then, otherwise) \ - __builtin_choose_expr(__same_type(x, type), then, otherwise) - -#define __virtio_pick_type(x, type, then, otherwise) \ - __virtio_pick_value(x, type, (then)0, otherwise) - -#define __virtio_pick_endian(x, x16, x32, x64, otherwise) \ - __virtio_pick_type(x, x16, __u16, \ - __virtio_pick_type(x, x32, __u32, \ - __virtio_pick_type(x, x64, __u64, \ - otherwise))) - -#define __virtio_native_typeof(x) typeof( \ - __virtio_pick_type(x, __u8, __u8, \ - __virtio_pick_endian(x, __virtio16, __virtio32, __virtio64, \ - __virtio_pick_endian(x, __le16, __le32, __le64, \ - /* No other type allowed */ \ - (void)0)))) - -#endif +#define cpu_to_virtio(vdev, x, m) \ + _Generic((m), \ + __u8: (x), \ + __virtio16: cpu_to_virtio16((vdev), (x)), \ + __virtio32: cpu_to_virtio32((vdev), (x)), \ + __virtio64: cpu_to_virtio64((vdev), (x)), \ + /* + * Why define a default? checker can distinguish between + * e.g. __u16, __le16 and __virtio16, but GCC can't so + * attempts to define variants for both look like a duplicate + * variant to it. + */ \ + default: _Generic((m), \ + __u8: (x), \ + __le16: (__force __le16)cpu_to_virtio16((vdev), (x)), \ + __le32: (__force __le32)cpu_to_virtio32((vdev), (x)), \ + __le64: (__force __le64)cpu_to_virtio64((vdev), (x)), \ + default: _Generic((m), \ + __u8: (x), \ + __u16: (__force __u16)cpu_to_virtio16((vdev), (x)), \ + __u32: (__force __u32)cpu_to_virtio32((vdev), (x)), \ + __u64: (__force __u64)cpu_to_virtio64((vdev), (x)) \ + ) \ + ) \ + ) #define __virtio_native_type(structname, member) \ - __virtio_native_typeof(((structname*)0)->member) - -#define __virtio_typecheck(structname, member, val) \ - /* Must match the member's type, and be integer */ \ - typecheck(__virtio_native_type(structname, member), (val)) - + typeof(virtio_to_cpu(NULL, ((structname*)0)->member)) /* Config space accessors. */ #define virtio_cread(vdev, structname, member, ptr) \ do { \ - might_sleep(); \ - /* Must match the member's type, and be integer */ \ - if (!__virtio_typecheck(structname, member, *(ptr))) \ - (*ptr) = 1; \ + typeof(((structname*)0)->member) virtio_cread_v; \ \ - switch (sizeof(*ptr)) { \ + might_sleep(); \ + /* Sanity check: must match the member's type */ \ + typecheck(typeof(virtio_to_cpu((vdev), virtio_cread_v)), *(ptr)); \ + \ + switch (sizeof(virtio_cread_v)) { \ case 1: \ - *(ptr) = virtio_cread8(vdev, \ - offsetof(structname, member)); \ - break; \ case 2: \ - *(ptr) = virtio_cread16(vdev, \ - offsetof(structname, member)); \ - break; \ case 4: \ - *(ptr) = virtio_cread32(vdev, \ - offsetof(structname, member)); \ - break; \ - case 8: \ - *(ptr) = virtio_cread64(vdev, \ - offsetof(structname, member)); \ + vdev->config->get((vdev), \ + offsetof(structname, member), \ + &virtio_cread_v, \ + sizeof(virtio_cread_v)); \ break; \ default: \ - BUG(); \ + __virtio_cread_many((vdev), \ + offsetof(structname, member), \ + &virtio_cread_v, \ + 1, \ + sizeof(virtio_cread_v)); \ + break; \ } \ + *(ptr) = virtio_to_cpu(vdev, virtio_cread_v); \ } while(0) /* Config space accessors. */ #define virtio_cwrite(vdev, structname, member, ptr) \ do { \ - might_sleep(); \ - /* Must match the member's type, and be integer */ \ - if (!__virtio_typecheck(structname, member, *(ptr))) \ - BUG_ON((*ptr) == 1); \ + typeof(((structname*)0)->member) virtio_cwrite_v = \ + cpu_to_virtio(vdev, *(ptr), ((structname*)0)->member); \ \ - switch (sizeof(*ptr)) { \ - case 1: \ - virtio_cwrite8(vdev, \ - offsetof(structname, member), \ - *(ptr)); \ - break; \ - case 2: \ - virtio_cwrite16(vdev, \ - offsetof(structname, member), \ - *(ptr)); \ - break; \ - case 4: \ - virtio_cwrite32(vdev, \ - offsetof(structname, member), \ - *(ptr)); \ - break; \ - case 8: \ - virtio_cwrite64(vdev, \ - offsetof(structname, member), \ - *(ptr)); \ - break; \ - default: \ - BUG(); \ - } \ + might_sleep(); \ + /* Sanity check: must match the member's type */ \ + typecheck(typeof(virtio_to_cpu((vdev), virtio_cwrite_v)), *(ptr)); \ + \ + vdev->config->set((vdev), offsetof(structname, member), \ + &virtio_cwrite_v, \ + sizeof(virtio_cwrite_v)); \ } while(0) /* Read @count fields, @bytes each. */ From 14191c15ab9d87e60d2ebbfbf6df83d546152af1 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 10 Jul 2020 07:55:52 -0400 Subject: [PATCH 27/71] virtio_config: disallow native type fields (again) _Generic version allowed __uXX types but that is no longer necessary: Transitional devices should all use __virtioXX types (and __leXX for fields not present in the legacy devices). Modern ones should use __leXX. _uXX type would be a bug. Let's prevent that. Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_config.h | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 7fa000f02721..441fd6dd42ab 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -304,13 +304,7 @@ static inline __virtio64 cpu_to_virtio64(struct virtio_device *vdev, u64 val) __u8: (x), \ __le16: virtio16_to_cpu((vdev), (__force __virtio16)(x)), \ __le32: virtio32_to_cpu((vdev), (__force __virtio32)(x)), \ - __le64: virtio64_to_cpu((vdev), (__force __virtio64)(x)), \ - default: _Generic((x), \ - __u8: (x), \ - __u16: virtio16_to_cpu((vdev), (__force __virtio16)(x)), \ - __u32: virtio32_to_cpu((vdev), (__force __virtio32)(x)), \ - __u64: virtio64_to_cpu((vdev), (__force __virtio64)(x)) \ - ) \ + __le64: virtio64_to_cpu((vdev), (__force __virtio64)(x)) \ ) \ ) @@ -330,13 +324,7 @@ static inline __virtio64 cpu_to_virtio64(struct virtio_device *vdev, u64 val) __u8: (x), \ __le16: (__force __le16)cpu_to_virtio16((vdev), (x)), \ __le32: (__force __le32)cpu_to_virtio32((vdev), (x)), \ - __le64: (__force __le64)cpu_to_virtio64((vdev), (x)), \ - default: _Generic((m), \ - __u8: (x), \ - __u16: (__force __u16)cpu_to_virtio16((vdev), (x)), \ - __u32: (__force __u32)cpu_to_virtio32((vdev), (x)), \ - __u64: (__force __u64)cpu_to_virtio64((vdev), (x)) \ - ) \ + __le64: (__force __le64)cpu_to_virtio64((vdev), (x)) \ ) \ ) From e598960ff5e511b76a0eb8dff25207d35c2442c8 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 4 Aug 2020 17:33:08 -0400 Subject: [PATCH 28/71] virtio_config: LE config space accessors To be used by modern code, as well as to handle LE only fields such as balloon. Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_config.h | 65 +++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 441fd6dd42ab..5b5196fec899 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -375,6 +375,71 @@ static inline __virtio64 cpu_to_virtio64(struct virtio_device *vdev, u64 val) sizeof(virtio_cwrite_v)); \ } while(0) +/* + * Nothing virtio-specific about these, but let's worry about generalizing + * these later. + */ +#define virtio_le_to_cpu(x) \ + _Generic((x), \ + __u8: (x), \ + __le16: le16_to_cpu(x), \ + __le32: le32_to_cpu(x), \ + __le64: le64_to_cpu(x) \ + ) + +#define virtio_cpu_to_le(x, m) \ + _Generic((m), \ + __u8: (x), \ + __le16: cpu_to_le16(x), \ + __le32: cpu_to_le32(x), \ + __le64: cpu_to_le64(x) \ + ) + +/* LE (e.g. modern) Config space accessors. */ +#define virtio_cread_le(vdev, structname, member, ptr) \ + do { \ + typeof(((structname*)0)->member) virtio_cread_v; \ + \ + might_sleep(); \ + /* Sanity check: must match the member's type */ \ + typecheck(typeof(virtio_le_to_cpu(virtio_cread_v)), *(ptr)); \ + \ + switch (sizeof(virtio_cread_v)) { \ + case 1: \ + case 2: \ + case 4: \ + vdev->config->get((vdev), \ + offsetof(structname, member), \ + &virtio_cread_v, \ + sizeof(virtio_cread_v)); \ + break; \ + default: \ + __virtio_cread_many((vdev), \ + offsetof(structname, member), \ + &virtio_cread_v, \ + 1, \ + sizeof(virtio_cread_v)); \ + break; \ + } \ + *(ptr) = virtio_le_to_cpu(virtio_cread_v); \ + } while(0) + +/* Config space accessors. */ +#define virtio_cwrite_le(vdev, structname, member, ptr) \ + do { \ + typeof(((structname*)0)->member) virtio_cwrite_v = \ + virtio_cpu_to_le(*(ptr), ((structname*)0)->member); \ + \ + might_sleep(); \ + /* Sanity check: must match the member's type */ \ + typecheck(typeof(virtio_le_to_cpu(virtio_cwrite_v)), *(ptr)); \ + \ + vdev->config->set((vdev), offsetof(structname, member), \ + &virtio_cwrite_v, \ + sizeof(virtio_cwrite_v)); \ + } while(0) + + /* Read @count fields, @bytes each. */ static inline void __virtio_cread_many(struct virtio_device *vdev, unsigned int offset, From e3e7994d53082e48d2a6a248376683d3be3dff9d Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 4 Aug 2020 18:02:39 -0400 Subject: [PATCH 29/71] virtio_caif: correct tags for config space fields Tag config space fields as having virtio endian-ness. Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_caif.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/virtio_caif.h b/include/linux/virtio_caif.h index 5d2d3124ca3d..ea722479510c 100644 --- a/include/linux/virtio_caif.h +++ b/include/linux/virtio_caif.h @@ -11,9 +11,9 @@ #include struct virtio_caif_transf_config { - u16 headroom; - u16 tailroom; - u32 mtu; + __virtio16 headroom; + __virtio16 tailroom; + __virtio32 mtu; u8 reserved[4]; }; From 035ce4210be1257dd417785ff7818b5c0f2205fb Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 5 Aug 2020 09:17:38 -0400 Subject: [PATCH 30/71] virtio_config: add virtio_cread_le_feature Mirrors virtio_cread_feature but for LE fields. Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_config.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 5b5196fec899..cc7a2b1fd7b2 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -555,4 +555,14 @@ static inline void virtio_cwrite64(struct virtio_device *vdev, _r; \ }) +/* Conditional config space accessors. */ +#define virtio_cread_le_feature(vdev, fbit, structname, member, ptr) \ + ({ \ + int _r = 0; \ + if (!virtio_has_feature(vdev, fbit)) \ + _r = -ENOENT; \ + else \ + virtio_cread_le((vdev), structname, member, ptr); \ + _r; \ + }) #endif /* _LINUX_VIRTIO_CONFIG_H */ From 805769d7c0ddc4dd07a35f81c8b7a0a20f90dd05 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 4 Aug 2020 17:51:35 -0400 Subject: [PATCH 31/71] virtio_balloon: use LE config space accesses Balloon is LE, it's cleaner to access it as such directly. Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_balloon.c | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 8bc1704ffdf3..31cc97f2f515 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -398,12 +398,9 @@ static inline s64 towards_target(struct virtio_balloon *vb) s64 target; u32 num_pages; - virtio_cread(vb->vdev, struct virtio_balloon_config, num_pages, - &num_pages); - /* Legacy balloon config space is LE, unlike all other devices. */ - if (!virtio_has_feature(vb->vdev, VIRTIO_F_VERSION_1)) - num_pages = le32_to_cpu((__force __le32)num_pages); + virtio_cread_le(vb->vdev, struct virtio_balloon_config, num_pages, + &num_pages); target = num_pages; return target - vb->num_pages; @@ -462,11 +459,8 @@ static void update_balloon_size(struct virtio_balloon *vb) u32 actual = vb->num_pages; /* Legacy balloon config space is LE, unlike all other devices. */ - if (!virtio_has_feature(vb->vdev, VIRTIO_F_VERSION_1)) - actual = (__force u32)cpu_to_le32(actual); - - virtio_cwrite(vb->vdev, struct virtio_balloon_config, actual, - &actual); + virtio_cwrite_le(vb->vdev, struct virtio_balloon_config, actual, + &actual); } static void update_balloon_stats_func(struct work_struct *work) @@ -579,12 +573,10 @@ static u32 virtio_balloon_cmd_id_received(struct virtio_balloon *vb) { if (test_and_clear_bit(VIRTIO_BALLOON_CONFIG_READ_CMD_ID, &vb->config_read_bitmap)) { - virtio_cread(vb->vdev, struct virtio_balloon_config, - free_page_hint_cmd_id, - &vb->cmd_id_received_cache); /* Legacy balloon config space is LE, unlike all other devices. */ - if (!virtio_has_feature(vb->vdev, VIRTIO_F_VERSION_1)) - vb->cmd_id_received_cache = le32_to_cpu((__force __le32)vb->cmd_id_received_cache); + virtio_cread_le(vb->vdev, struct virtio_balloon_config, + free_page_hint_cmd_id, + &vb->cmd_id_received_cache); } return vb->cmd_id_received_cache; @@ -987,8 +979,8 @@ static int virtballoon_probe(struct virtio_device *vdev) if (!want_init_on_free()) memset(&poison_val, PAGE_POISON, sizeof(poison_val)); - virtio_cwrite(vb->vdev, struct virtio_balloon_config, - poison_val, &poison_val); + virtio_cwrite_le(vb->vdev, struct virtio_balloon_config, + poison_val, &poison_val); } vb->pr_dev_info.report = virtballoon_free_page_report; From b025584098e621d88894d28e80af686958e273af Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 5 Aug 2020 05:39:36 -0400 Subject: [PATCH 32/71] virtio_input: convert to LE accessors Virtio input is modern-only. Use LE accessors for config space. Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_input.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/virtio/virtio_input.c b/drivers/virtio/virtio_input.c index efaf65b0f42d..877b2ea3ed05 100644 --- a/drivers/virtio/virtio_input.c +++ b/drivers/virtio/virtio_input.c @@ -113,9 +113,9 @@ static u8 virtinput_cfg_select(struct virtio_input *vi, { u8 size; - virtio_cwrite(vi->vdev, struct virtio_input_config, select, &select); - virtio_cwrite(vi->vdev, struct virtio_input_config, subsel, &subsel); - virtio_cread(vi->vdev, struct virtio_input_config, size, &size); + virtio_cwrite_le(vi->vdev, struct virtio_input_config, select, &select); + virtio_cwrite_le(vi->vdev, struct virtio_input_config, subsel, &subsel); + virtio_cread_le(vi->vdev, struct virtio_input_config, size, &size); return size; } @@ -158,11 +158,11 @@ static void virtinput_cfg_abs(struct virtio_input *vi, int abs) u32 mi, ma, re, fu, fl; virtinput_cfg_select(vi, VIRTIO_INPUT_CFG_ABS_INFO, abs); - virtio_cread(vi->vdev, struct virtio_input_config, u.abs.min, &mi); - virtio_cread(vi->vdev, struct virtio_input_config, u.abs.max, &ma); - virtio_cread(vi->vdev, struct virtio_input_config, u.abs.res, &re); - virtio_cread(vi->vdev, struct virtio_input_config, u.abs.fuzz, &fu); - virtio_cread(vi->vdev, struct virtio_input_config, u.abs.flat, &fl); + virtio_cread_le(vi->vdev, struct virtio_input_config, u.abs.min, &mi); + virtio_cread_le(vi->vdev, struct virtio_input_config, u.abs.max, &ma); + virtio_cread_le(vi->vdev, struct virtio_input_config, u.abs.res, &re); + virtio_cread_le(vi->vdev, struct virtio_input_config, u.abs.fuzz, &fu); + virtio_cread_le(vi->vdev, struct virtio_input_config, u.abs.flat, &fl); input_set_abs_params(vi->idev, abs, mi, ma, fu, fl); input_abs_set_res(vi->idev, abs, re); } @@ -244,14 +244,14 @@ static int virtinput_probe(struct virtio_device *vdev) size = virtinput_cfg_select(vi, VIRTIO_INPUT_CFG_ID_DEVIDS, 0); if (size >= sizeof(struct virtio_input_devids)) { - virtio_cread(vi->vdev, struct virtio_input_config, - u.ids.bustype, &vi->idev->id.bustype); - virtio_cread(vi->vdev, struct virtio_input_config, - u.ids.vendor, &vi->idev->id.vendor); - virtio_cread(vi->vdev, struct virtio_input_config, - u.ids.product, &vi->idev->id.product); - virtio_cread(vi->vdev, struct virtio_input_config, - u.ids.version, &vi->idev->id.version); + virtio_cread_le(vi->vdev, struct virtio_input_config, + u.ids.bustype, &vi->idev->id.bustype); + virtio_cread_le(vi->vdev, struct virtio_input_config, + u.ids.vendor, &vi->idev->id.vendor); + virtio_cread_le(vi->vdev, struct virtio_input_config, + u.ids.product, &vi->idev->id.product); + virtio_cread_le(vi->vdev, struct virtio_input_config, + u.ids.version, &vi->idev->id.version); } else { vi->idev->id.bustype = BUS_VIRTUAL; } From 2c0349ec1a8ee6f20eb164a3a691bf661043fd24 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 5 Aug 2020 05:39:36 -0400 Subject: [PATCH 33/71] virtio_fs: convert to LE accessors Virtio fs is modern-only. Use LE accessors for config space. Signed-off-by: Michael S. Tsirkin --- fs/fuse/virtio_fs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 4c4ef5d69298..104f35de5270 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -606,8 +606,8 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev, unsigned int i; int ret = 0; - virtio_cread(vdev, struct virtio_fs_config, num_request_queues, - &fs->num_request_queues); + virtio_cread_le(vdev, struct virtio_fs_config, num_request_queues, + &fs->num_request_queues); if (fs->num_request_queues == 0) return -EINVAL; From b13a54070cea06d122b3bdc56e5ce024fff4d2d2 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 5 Aug 2020 05:39:36 -0400 Subject: [PATCH 34/71] virtio_crypto: convert to LE accessors Virtio crypto is modern-only. Use LE accessors for config space. Signed-off-by: Michael S. Tsirkin --- drivers/crypto/virtio/virtio_crypto_core.c | 46 +++++++++++----------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/drivers/crypto/virtio/virtio_crypto_core.c b/drivers/crypto/virtio/virtio_crypto_core.c index c8a962c62663..aeecce27fe8f 100644 --- a/drivers/crypto/virtio/virtio_crypto_core.c +++ b/drivers/crypto/virtio/virtio_crypto_core.c @@ -204,8 +204,8 @@ static int virtcrypto_update_status(struct virtio_crypto *vcrypto) u32 status; int err; - virtio_cread(vcrypto->vdev, - struct virtio_crypto_config, status, &status); + virtio_cread_le(vcrypto->vdev, + struct virtio_crypto_config, status, &status); /* * Unknown status bits would be a host error and the driver @@ -323,31 +323,31 @@ static int virtcrypto_probe(struct virtio_device *vdev) if (!vcrypto) return -ENOMEM; - virtio_cread(vdev, struct virtio_crypto_config, + virtio_cread_le(vdev, struct virtio_crypto_config, max_dataqueues, &max_data_queues); if (max_data_queues < 1) max_data_queues = 1; - virtio_cread(vdev, struct virtio_crypto_config, - max_cipher_key_len, &max_cipher_key_len); - virtio_cread(vdev, struct virtio_crypto_config, - max_auth_key_len, &max_auth_key_len); - virtio_cread(vdev, struct virtio_crypto_config, - max_size, &max_size); - virtio_cread(vdev, struct virtio_crypto_config, - crypto_services, &crypto_services); - virtio_cread(vdev, struct virtio_crypto_config, - cipher_algo_l, &cipher_algo_l); - virtio_cread(vdev, struct virtio_crypto_config, - cipher_algo_h, &cipher_algo_h); - virtio_cread(vdev, struct virtio_crypto_config, - hash_algo, &hash_algo); - virtio_cread(vdev, struct virtio_crypto_config, - mac_algo_l, &mac_algo_l); - virtio_cread(vdev, struct virtio_crypto_config, - mac_algo_h, &mac_algo_h); - virtio_cread(vdev, struct virtio_crypto_config, - aead_algo, &aead_algo); + virtio_cread_le(vdev, struct virtio_crypto_config, + max_cipher_key_len, &max_cipher_key_len); + virtio_cread_le(vdev, struct virtio_crypto_config, + max_auth_key_len, &max_auth_key_len); + virtio_cread_le(vdev, struct virtio_crypto_config, + max_size, &max_size); + virtio_cread_le(vdev, struct virtio_crypto_config, + crypto_services, &crypto_services); + virtio_cread_le(vdev, struct virtio_crypto_config, + cipher_algo_l, &cipher_algo_l); + virtio_cread_le(vdev, struct virtio_crypto_config, + cipher_algo_h, &cipher_algo_h); + virtio_cread_le(vdev, struct virtio_crypto_config, + hash_algo, &hash_algo); + virtio_cread_le(vdev, struct virtio_crypto_config, + mac_algo_l, &mac_algo_l); + virtio_cread_le(vdev, struct virtio_crypto_config, + mac_algo_h, &mac_algo_h); + virtio_cread_le(vdev, struct virtio_crypto_config, + aead_algo, &aead_algo); /* Add virtio crypto device to global table */ err = virtcrypto_devmgr_add_dev(vcrypto); From 02e715b7fadb5674e94b71de6424a07f6d4d493f Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 5 Aug 2020 05:39:36 -0400 Subject: [PATCH 35/71] virtio_pmem: convert to LE accessors Virtio pmem is modern-only. Use LE accessors for config space. Signed-off-by: Michael S. Tsirkin --- drivers/nvdimm/virtio_pmem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvdimm/virtio_pmem.c b/drivers/nvdimm/virtio_pmem.c index 5e3d07b47e0c..726c7354d465 100644 --- a/drivers/nvdimm/virtio_pmem.c +++ b/drivers/nvdimm/virtio_pmem.c @@ -58,9 +58,9 @@ static int virtio_pmem_probe(struct virtio_device *vdev) goto out_err; } - virtio_cread(vpmem->vdev, struct virtio_pmem_config, + virtio_cread_le(vpmem->vdev, struct virtio_pmem_config, start, &vpmem->start); - virtio_cread(vpmem->vdev, struct virtio_pmem_config, + virtio_cread_le(vpmem->vdev, struct virtio_pmem_config, size, &vpmem->size); res.start = vpmem->start; From 115a71d8045d8571fb05df45088837621510ba57 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 5 Aug 2020 05:39:36 -0400 Subject: [PATCH 36/71] drm/virtio: convert to LE accessors Virtgpu is modern-only. Use LE accessors for config space. Signed-off-by: Michael S. Tsirkin --- drivers/gpu/drm/virtio/virtgpu_kms.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c index 0a5c8cf409fb..4d944a0dff3e 100644 --- a/drivers/gpu/drm/virtio/virtgpu_kms.c +++ b/drivers/gpu/drm/virtio/virtgpu_kms.c @@ -39,8 +39,8 @@ static void virtio_gpu_config_changed_work_func(struct work_struct *work) u32 events_read, events_clear = 0; /* read the config space */ - virtio_cread(vgdev->vdev, struct virtio_gpu_config, - events_read, &events_read); + virtio_cread_le(vgdev->vdev, struct virtio_gpu_config, + events_read, &events_read); if (events_read & VIRTIO_GPU_EVENT_DISPLAY) { if (vgdev->has_edid) virtio_gpu_cmd_get_edids(vgdev); @@ -49,8 +49,8 @@ static void virtio_gpu_config_changed_work_func(struct work_struct *work) drm_helper_hpd_irq_event(vgdev->ddev); events_clear |= VIRTIO_GPU_EVENT_DISPLAY; } - virtio_cwrite(vgdev->vdev, struct virtio_gpu_config, - events_clear, &events_clear); + virtio_cwrite_le(vgdev->vdev, struct virtio_gpu_config, + events_clear, &events_clear); } static void virtio_gpu_init_vq(struct virtio_gpu_queue *vgvq, @@ -165,8 +165,8 @@ int virtio_gpu_init(struct drm_device *dev) } /* get display info */ - virtio_cread(vgdev->vdev, struct virtio_gpu_config, - num_scanouts, &num_scanouts); + virtio_cread_le(vgdev->vdev, struct virtio_gpu_config, + num_scanouts, &num_scanouts); vgdev->num_scanouts = min_t(uint32_t, num_scanouts, VIRTIO_GPU_MAX_SCANOUTS); if (!vgdev->num_scanouts) { @@ -176,8 +176,8 @@ int virtio_gpu_init(struct drm_device *dev) } DRM_INFO("number of scanouts: %d\n", num_scanouts); - virtio_cread(vgdev->vdev, struct virtio_gpu_config, - num_capsets, &num_capsets); + virtio_cread_le(vgdev->vdev, struct virtio_gpu_config, + num_capsets, &num_capsets); DRM_INFO("number of cap sets: %d\n", num_capsets); virtio_gpu_modeset_init(vgdev); From 99e0d0488ba6a03c493fef5492696cdee07457ec Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 5 Aug 2020 05:39:36 -0400 Subject: [PATCH 37/71] virtio_mem: convert to LE accessors Virtio mem is modern-only. Use LE accessors for config space. Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index f26f5f64ae82..c08512fcea90 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -1530,21 +1530,21 @@ static void virtio_mem_refresh_config(struct virtio_mem *vm) uint64_t new_plugged_size, usable_region_size, end_addr; /* the plugged_size is just a reflection of what _we_ did previously */ - virtio_cread(vm->vdev, struct virtio_mem_config, plugged_size, - &new_plugged_size); + virtio_cread_le(vm->vdev, struct virtio_mem_config, plugged_size, + &new_plugged_size); if (WARN_ON_ONCE(new_plugged_size != vm->plugged_size)) vm->plugged_size = new_plugged_size; /* calculate the last usable memory block id */ - virtio_cread(vm->vdev, struct virtio_mem_config, - usable_region_size, &usable_region_size); + virtio_cread_le(vm->vdev, struct virtio_mem_config, + usable_region_size, &usable_region_size); end_addr = vm->addr + usable_region_size; end_addr = min(end_addr, phys_limit); vm->last_usable_mb_id = virtio_mem_phys_to_mb_id(end_addr) - 1; /* see if there is a request to change the size */ - virtio_cread(vm->vdev, struct virtio_mem_config, requested_size, - &vm->requested_size); + virtio_cread_le(vm->vdev, struct virtio_mem_config, requested_size, + &vm->requested_size); dev_info(&vm->vdev->dev, "plugged size: 0x%llx", vm->plugged_size); dev_info(&vm->vdev->dev, "requested size: 0x%llx", vm->requested_size); @@ -1677,16 +1677,16 @@ static int virtio_mem_init(struct virtio_mem *vm) } /* Fetch all properties that can't change. */ - virtio_cread(vm->vdev, struct virtio_mem_config, plugged_size, - &vm->plugged_size); - virtio_cread(vm->vdev, struct virtio_mem_config, block_size, - &vm->device_block_size); - virtio_cread(vm->vdev, struct virtio_mem_config, node_id, - &node_id); + virtio_cread_le(vm->vdev, struct virtio_mem_config, plugged_size, + &vm->plugged_size); + virtio_cread_le(vm->vdev, struct virtio_mem_config, block_size, + &vm->device_block_size); + virtio_cread_le(vm->vdev, struct virtio_mem_config, node_id, + &node_id); vm->nid = virtio_mem_translate_node_id(vm, node_id); - virtio_cread(vm->vdev, struct virtio_mem_config, addr, &vm->addr); - virtio_cread(vm->vdev, struct virtio_mem_config, region_size, - &vm->region_size); + virtio_cread_le(vm->vdev, struct virtio_mem_config, addr, &vm->addr); + virtio_cread_le(vm->vdev, struct virtio_mem_config, region_size, + &vm->region_size); /* * We always hotplug memory in memory block granularity. This way, From d83c67c4a669a4b396239c338bef802cae72e648 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 5 Aug 2020 05:39:36 -0400 Subject: [PATCH 38/71] virtio-iommu: convert to LE accessors Virtio iommu is modern-only. Use LE accessors for config space. Signed-off-by: Michael S. Tsirkin --- drivers/iommu/virtio-iommu.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index f6f07489a9aa..b4da396cce60 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -1010,8 +1010,8 @@ static int viommu_probe(struct virtio_device *vdev) if (ret) return ret; - virtio_cread(vdev, struct virtio_iommu_config, page_size_mask, - &viommu->pgsize_bitmap); + virtio_cread_le(vdev, struct virtio_iommu_config, page_size_mask, + &viommu->pgsize_bitmap); if (!viommu->pgsize_bitmap) { ret = -EINVAL; @@ -1022,25 +1022,25 @@ static int viommu_probe(struct virtio_device *vdev) viommu->last_domain = ~0U; /* Optional features */ - virtio_cread_feature(vdev, VIRTIO_IOMMU_F_INPUT_RANGE, - struct virtio_iommu_config, input_range.start, - &input_start); + virtio_cread_le_feature(vdev, VIRTIO_IOMMU_F_INPUT_RANGE, + struct virtio_iommu_config, input_range.start, + &input_start); - virtio_cread_feature(vdev, VIRTIO_IOMMU_F_INPUT_RANGE, - struct virtio_iommu_config, input_range.end, - &input_end); + virtio_cread_le_feature(vdev, VIRTIO_IOMMU_F_INPUT_RANGE, + struct virtio_iommu_config, input_range.end, + &input_end); - virtio_cread_feature(vdev, VIRTIO_IOMMU_F_DOMAIN_RANGE, - struct virtio_iommu_config, domain_range.start, - &viommu->first_domain); + virtio_cread_le_feature(vdev, VIRTIO_IOMMU_F_DOMAIN_RANGE, + struct virtio_iommu_config, domain_range.start, + &viommu->first_domain); - virtio_cread_feature(vdev, VIRTIO_IOMMU_F_DOMAIN_RANGE, - struct virtio_iommu_config, domain_range.end, - &viommu->last_domain); + virtio_cread_le_feature(vdev, VIRTIO_IOMMU_F_DOMAIN_RANGE, + struct virtio_iommu_config, domain_range.end, + &viommu->last_domain); - virtio_cread_feature(vdev, VIRTIO_IOMMU_F_PROBE, - struct virtio_iommu_config, probe_size, - &viommu->probe_size); + virtio_cread_le_feature(vdev, VIRTIO_IOMMU_F_PROBE, + struct virtio_iommu_config, probe_size, + &viommu->probe_size); viommu->geometry = (struct iommu_domain_geometry) { .aperture_start = input_start, From 83eb9db95eb453f1db651909ad4598c3d44ef1e1 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 5 Aug 2020 07:29:12 -0400 Subject: [PATCH 39/71] virtio_config: drop LE option from config space All drivers now use virtio_cread/write_le for LE config space fields. Drop LE option from virtio_cread/write, only leaving the option to access transitional fields. Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_config.h | 28 ++-------------------------- 1 file changed, 2 insertions(+), 26 deletions(-) diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index cc7a2b1fd7b2..ecb166c824bb 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -293,19 +293,7 @@ static inline __virtio64 cpu_to_virtio64(struct virtio_device *vdev, u64 val) __u8: (x), \ __virtio16: virtio16_to_cpu((vdev), (x)), \ __virtio32: virtio32_to_cpu((vdev), (x)), \ - __virtio64: virtio64_to_cpu((vdev), (x)), \ - /* - * Why define a default? checker can distinguish between - * e.g. __u16, __le16 and __virtio16, but GCC can't so - * attempts to define variants for both look like a duplicate - * variant to it. - */ \ - default: _Generic((x), \ - __u8: (x), \ - __le16: virtio16_to_cpu((vdev), (__force __virtio16)(x)), \ - __le32: virtio32_to_cpu((vdev), (__force __virtio32)(x)), \ - __le64: virtio64_to_cpu((vdev), (__force __virtio64)(x)) \ - ) \ + __virtio64: virtio64_to_cpu((vdev), (x)) \ ) #define cpu_to_virtio(vdev, x, m) \ @@ -313,19 +301,7 @@ static inline __virtio64 cpu_to_virtio64(struct virtio_device *vdev, u64 val) __u8: (x), \ __virtio16: cpu_to_virtio16((vdev), (x)), \ __virtio32: cpu_to_virtio32((vdev), (x)), \ - __virtio64: cpu_to_virtio64((vdev), (x)), \ - /* - * Why define a default? checker can distinguish between - * e.g. __u16, __le16 and __virtio16, but GCC can't so - * attempts to define variants for both look like a duplicate - * variant to it. - */ \ - default: _Generic((m), \ - __u8: (x), \ - __le16: (__force __le16)cpu_to_virtio16((vdev), (x)), \ - __le32: (__force __le32)cpu_to_virtio32((vdev), (x)), \ - __le64: (__force __le64)cpu_to_virtio64((vdev), (x)) \ - ) \ + __virtio64: cpu_to_virtio64((vdev), (x)) \ ) #define __virtio_native_type(structname, member) \ From 64ffa39dc860fb9772225c694353f73eca5801c6 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 5 Aug 2020 05:39:36 -0400 Subject: [PATCH 40/71] virtio_net: use LE accessors for speed/duplex Speed and duplex config fields depend on VIRTIO_NET_F_SPEED_DUPLEX which being 63>31 depends on VIRTIO_F_VERSION_1. Accordingly, use LE accessors for these fields. Reported-by: Cornelia Huck Signed-off-by: Michael S. Tsirkin --- drivers/net/virtio_net.c | 9 +++++---- include/uapi/linux/virtio_net.h | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index ba38765dc490..0934b1ec5320 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2264,12 +2264,13 @@ static void virtnet_update_settings(struct virtnet_info *vi) if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_SPEED_DUPLEX)) return; - speed = virtio_cread32(vi->vdev, offsetof(struct virtio_net_config, - speed)); + virtio_cread_le(vi->vdev, struct virtio_net_config, speed, &speed); + if (ethtool_validate_speed(speed)) vi->speed = speed; - duplex = virtio_cread8(vi->vdev, offsetof(struct virtio_net_config, - duplex)); + + virtio_cread_le(vi->vdev, struct virtio_net_config, duplex, &duplex); + if (ethtool_validate_duplex(duplex)) vi->duplex = duplex; } diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h index 27d996f29dd1..3f55a4215f11 100644 --- a/include/uapi/linux/virtio_net.h +++ b/include/uapi/linux/virtio_net.h @@ -99,7 +99,7 @@ struct virtio_net_config { * speed, in units of 1Mb. All values 0 to INT_MAX are legal. * Any other value stands for unknown. */ - __virtio32 speed; + __le32 speed; /* * 0x00 - half duplex * 0x01 - full duplex From 481a0d7422db26fb63e2d64f0652667a5c6d0f3e Mon Sep 17 00:00:00 2001 From: Mao Wenan Date: Sun, 2 Aug 2020 15:44:09 +0800 Subject: [PATCH 41/71] virtio_ring: Avoid loop when vq is broken in virtqueue_poll The loop may exist if vq->broken is true, virtqueue_get_buf_ctx_packed or virtqueue_get_buf_ctx_split will return NULL, so virtnet_poll will reschedule napi to receive packet, it will lead cpu usage(si) to 100%. call trace as below: virtnet_poll virtnet_receive virtqueue_get_buf_ctx virtqueue_get_buf_ctx_packed virtqueue_get_buf_ctx_split virtqueue_napi_complete virtqueue_poll //return true virtqueue_napi_schedule //it will reschedule napi to fix this, return false if vq is broken in virtqueue_poll. Signed-off-by: Mao Wenan Acked-by: Michael S. Tsirkin Link: https://lore.kernel.org/r/1596354249-96204-1-git-send-email-wenan.mao@linux.alibaba.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/virtio/virtio_ring.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 34253cb69cb8..7d4bc9eb7fc5 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -1960,6 +1960,9 @@ bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx) { struct vring_virtqueue *vq = to_vvq(_vq); + if (unlikely(vq->broken)) + return false; + virtio_mb(vq->weak_barriers); return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) : virtqueue_poll_split(_vq, last_used_idx); From 6234f80574d7569444d8718355fa2838e92b158b Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Mon, 20 Jul 2020 16:50:43 +0800 Subject: [PATCH 42/71] vhost: vdpa: remove per device feature whitelist We used to have a per device feature whitelist to filter out the unsupported virtio features. But this seems unnecessary since: - the main idea behind feature whitelist is to block control vq feature until we finalize the control virtqueue API. But the current vhost-vDPA uAPI is sufficient to support control virtqueue. For device that has hardware control virtqueue, the vDPA device driver can just setup the hardware virtqueue and let userspace to use hardware virtqueue directly. For device that doesn't have a control virtqueue, the vDPA device driver need to use e.g vringh to emulate a software control virtqueue. - we don't do it in virtio-vDPA driver So remove this limitation. Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20200720085043.16485-1-jasowang@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vdpa.c | 37 ------------------------------------- 1 file changed, 37 deletions(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 3674404688f5..210f213a028b 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -26,35 +26,6 @@ #include "vhost.h" -enum { - VHOST_VDPA_FEATURES = - (1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | - (1ULL << VIRTIO_F_ANY_LAYOUT) | - (1ULL << VIRTIO_F_VERSION_1) | - (1ULL << VIRTIO_F_ACCESS_PLATFORM) | - (1ULL << VIRTIO_F_RING_PACKED) | - (1ULL << VIRTIO_F_ORDER_PLATFORM) | - (1ULL << VIRTIO_RING_F_INDIRECT_DESC) | - (1ULL << VIRTIO_RING_F_EVENT_IDX), - - VHOST_VDPA_NET_FEATURES = VHOST_VDPA_FEATURES | - (1ULL << VIRTIO_NET_F_CSUM) | - (1ULL << VIRTIO_NET_F_GUEST_CSUM) | - (1ULL << VIRTIO_NET_F_MTU) | - (1ULL << VIRTIO_NET_F_MAC) | - (1ULL << VIRTIO_NET_F_GUEST_TSO4) | - (1ULL << VIRTIO_NET_F_GUEST_TSO6) | - (1ULL << VIRTIO_NET_F_GUEST_ECN) | - (1ULL << VIRTIO_NET_F_GUEST_UFO) | - (1ULL << VIRTIO_NET_F_HOST_TSO4) | - (1ULL << VIRTIO_NET_F_HOST_TSO6) | - (1ULL << VIRTIO_NET_F_HOST_ECN) | - (1ULL << VIRTIO_NET_F_HOST_UFO) | - (1ULL << VIRTIO_NET_F_MRG_RXBUF) | - (1ULL << VIRTIO_NET_F_STATUS) | - (1ULL << VIRTIO_NET_F_SPEED_DUPLEX), -}; - /* Currently, only network backend w/o multiqueue is supported. */ #define VHOST_VDPA_VQ_MAX 2 @@ -79,10 +50,6 @@ static DEFINE_IDA(vhost_vdpa_ida); static dev_t vhost_vdpa_major; -static const u64 vhost_vdpa_features[] = { - [VIRTIO_ID_NET] = VHOST_VDPA_NET_FEATURES, -}; - static void handle_vq_kick(struct vhost_work *work) { struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, @@ -253,7 +220,6 @@ static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep) u64 features; features = ops->get_features(vdpa); - features &= vhost_vdpa_features[v->virtio_id]; if (copy_to_user(featurep, &features, sizeof(features))) return -EFAULT; @@ -277,9 +243,6 @@ static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep) if (copy_from_user(&features, featurep, sizeof(features))) return -EFAULT; - if (features & ~vhost_vdpa_features[v->virtio_id]) - return -EINVAL; - if (vdpa_set_features(vdpa, features)) return -EINVAL; From 0ea9ee430e74b16c6b17e70757d1c26d8d140e1f Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Fri, 31 Jul 2020 15:38:22 +0800 Subject: [PATCH 43/71] vdpasim: protect concurrent access to iommu iotlb Iommu iotlb can be accessed by different cores for performing IO using multiple virt queues. Add a spinlock to synchronize iotlb accesses. This could be easily reproduced when using more than 1 pktgen threads to inject traffic to vdpa simulator. Fixes: 2c53d0f64c06f("vdpasim: vDPA device simulator") Cc: stable@vger.kernel.org Signed-off-by: Max Gurtovoy Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20200731073822.13326-1-jasowang@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index b7d5727fde4c..2a55b463e64c 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -71,6 +71,8 @@ struct vdpasim { u32 status; u32 generation; u64 features; + /* spinlock to synchronize iommu table */ + spinlock_t iommu_lock; }; /* TODO: cross-endian support */ @@ -136,7 +138,9 @@ static void vdpasim_reset(struct vdpasim *vdpasim) for (i = 0; i < VDPASIM_VQ_NUM; i++) vdpasim_vq_reset(&vdpasim->vqs[i]); + spin_lock(&vdpasim->iommu_lock); vhost_iotlb_reset(vdpasim->iommu); + spin_unlock(&vdpasim->iommu_lock); vdpasim->features = 0; vdpasim->status = 0; @@ -254,8 +258,10 @@ static dma_addr_t vdpasim_map_page(struct device *dev, struct page *page, /* For simplicity, use identical mapping to avoid e.g iova * allocator. */ + spin_lock(&vdpasim->iommu_lock); ret = vhost_iotlb_add_range(iommu, pa, pa + size - 1, pa, dir_to_perm(dir)); + spin_unlock(&vdpasim->iommu_lock); if (ret) return DMA_MAPPING_ERROR; @@ -269,8 +275,10 @@ static void vdpasim_unmap_page(struct device *dev, dma_addr_t dma_addr, struct vdpasim *vdpasim = dev_to_sim(dev); struct vhost_iotlb *iommu = vdpasim->iommu; + spin_lock(&vdpasim->iommu_lock); vhost_iotlb_del_range(iommu, (u64)dma_addr, (u64)dma_addr + size - 1); + spin_unlock(&vdpasim->iommu_lock); } static void *vdpasim_alloc_coherent(struct device *dev, size_t size, @@ -282,9 +290,10 @@ static void *vdpasim_alloc_coherent(struct device *dev, size_t size, void *addr = kmalloc(size, flag); int ret; - if (!addr) + spin_lock(&vdpasim->iommu_lock); + if (!addr) { *dma_addr = DMA_MAPPING_ERROR; - else { + } else { u64 pa = virt_to_phys(addr); ret = vhost_iotlb_add_range(iommu, (u64)pa, @@ -297,6 +306,7 @@ static void *vdpasim_alloc_coherent(struct device *dev, size_t size, } else *dma_addr = (dma_addr_t)pa; } + spin_unlock(&vdpasim->iommu_lock); return addr; } @@ -308,8 +318,11 @@ static void vdpasim_free_coherent(struct device *dev, size_t size, struct vdpasim *vdpasim = dev_to_sim(dev); struct vhost_iotlb *iommu = vdpasim->iommu; + spin_lock(&vdpasim->iommu_lock); vhost_iotlb_del_range(iommu, (u64)dma_addr, (u64)dma_addr + size - 1); + spin_unlock(&vdpasim->iommu_lock); + kfree(phys_to_virt((uintptr_t)dma_addr)); } @@ -555,6 +568,7 @@ static int vdpasim_set_map(struct vdpa_device *vdpa, u64 start = 0ULL, last = 0ULL - 1; int ret; + spin_lock(&vdpasim->iommu_lock); vhost_iotlb_reset(vdpasim->iommu); for (map = vhost_iotlb_itree_first(iotlb, start, last); map; @@ -564,10 +578,12 @@ static int vdpasim_set_map(struct vdpa_device *vdpa, if (ret) goto err; } + spin_unlock(&vdpasim->iommu_lock); return 0; err: vhost_iotlb_reset(vdpasim->iommu); + spin_unlock(&vdpasim->iommu_lock); return ret; } @@ -575,16 +591,23 @@ static int vdpasim_dma_map(struct vdpa_device *vdpa, u64 iova, u64 size, u64 pa, u32 perm) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + int ret; - return vhost_iotlb_add_range(vdpasim->iommu, iova, - iova + size - 1, pa, perm); + spin_lock(&vdpasim->iommu_lock); + ret = vhost_iotlb_add_range(vdpasim->iommu, iova, iova + size - 1, pa, + perm); + spin_unlock(&vdpasim->iommu_lock); + + return ret; } static int vdpasim_dma_unmap(struct vdpa_device *vdpa, u64 iova, u64 size) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + spin_lock(&vdpasim->iommu_lock); vhost_iotlb_del_range(vdpasim->iommu, iova, iova + size - 1); + spin_unlock(&vdpasim->iommu_lock); return 0; } From bf11d71a0a919c32158dd89891d95f6f91a323b2 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 31 Jul 2020 08:09:56 -0500 Subject: [PATCH 44/71] vhost: Use flex_array_size() helper in copy_from_user() Make use of the flex_array_size() helper to calculate the size of a flexible array member within an enclosing structure. This helper offers defense-in-depth against potential integer overflows, while at the same time makes it explicitly clear that we are dealing with a flexible array member. Signed-off-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/20200731130956.GA30525@embeddedor Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vhost.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index d7b8df3edffc..39183ed738a1 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -1405,7 +1405,7 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) memcpy(newmem, &mem, size); if (copy_from_user(newmem->regions, m->regions, - mem.nregions * sizeof *m->regions)) { + flex_array_size(newmem, regions, mem.nregions))) { kvfree(newmem); return -EFAULT; } From 265a0ad8731dc04fccb76cd1abf5a85a9359e62e Mon Sep 17 00:00:00 2001 From: Zhu Lingshan Date: Fri, 31 Jul 2020 14:55:28 +0800 Subject: [PATCH 45/71] vhost: introduce vhost_vring_call This commit introduces struct vhost_vring_call which replaced raw struct eventfd_ctx *call_ctx in struct vhost_virtqueue. Besides eventfd_ctx, it contains a spin lock and an irq_bypass_producer in its structure. Signed-off-by: Zhu Lingshan Suggested-by: Jason Wang Link: https://lore.kernel.org/r/20200731065533.4144-2-lingshan.zhu@intel.com Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vdpa.c | 4 ++-- drivers/vhost/vhost.c | 22 ++++++++++++++++------ drivers/vhost/vhost.h | 9 ++++++++- 3 files changed, 26 insertions(+), 9 deletions(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 210f213a028b..1789e5ffb18f 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -63,7 +63,7 @@ static void handle_vq_kick(struct vhost_work *work) static irqreturn_t vhost_vdpa_virtqueue_cb(void *private) { struct vhost_virtqueue *vq = private; - struct eventfd_ctx *call_ctx = vq->call_ctx; + struct eventfd_ctx *call_ctx = vq->call_ctx.ctx; if (call_ctx) eventfd_signal(call_ctx, 1); @@ -343,7 +343,7 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, break; case VHOST_SET_VRING_CALL: - if (vq->call_ctx) { + if (vq->call_ctx.ctx) { cb.callback = vhost_vdpa_virtqueue_cb; cb.private = vq; } else { diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 39183ed738a1..8f622064b3e8 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -298,6 +298,13 @@ static void vhost_vq_meta_reset(struct vhost_dev *d) __vhost_vq_meta_reset(d->vqs[i]); } +static void vhost_vring_call_reset(struct vhost_vring_call *call_ctx) +{ + call_ctx->ctx = NULL; + memset(&call_ctx->producer, 0x0, sizeof(struct irq_bypass_producer)); + spin_lock_init(&call_ctx->ctx_lock); +} + static void vhost_vq_reset(struct vhost_dev *dev, struct vhost_virtqueue *vq) { @@ -319,13 +326,13 @@ static void vhost_vq_reset(struct vhost_dev *dev, vq->log_base = NULL; vq->error_ctx = NULL; vq->kick = NULL; - vq->call_ctx = NULL; vq->log_ctx = NULL; vhost_reset_is_le(vq); vhost_disable_cross_endian(vq); vq->busyloop_timeout = 0; vq->umem = NULL; vq->iotlb = NULL; + vhost_vring_call_reset(&vq->call_ctx); __vhost_vq_meta_reset(vq); } @@ -685,8 +692,8 @@ void vhost_dev_cleanup(struct vhost_dev *dev) eventfd_ctx_put(dev->vqs[i]->error_ctx); if (dev->vqs[i]->kick) fput(dev->vqs[i]->kick); - if (dev->vqs[i]->call_ctx) - eventfd_ctx_put(dev->vqs[i]->call_ctx); + if (dev->vqs[i]->call_ctx.ctx) + eventfd_ctx_put(dev->vqs[i]->call_ctx.ctx); vhost_vq_reset(dev, dev->vqs[i]); } vhost_dev_free_iovecs(dev); @@ -1629,7 +1636,10 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg r = PTR_ERR(ctx); break; } - swap(ctx, vq->call_ctx); + + spin_lock(&vq->call_ctx.ctx_lock); + swap(ctx, vq->call_ctx.ctx); + spin_unlock(&vq->call_ctx.ctx_lock); break; case VHOST_SET_VRING_ERR: if (copy_from_user(&f, argp, sizeof f)) { @@ -2440,8 +2450,8 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq) { /* Signal the Guest tell them we used something up. */ - if (vq->call_ctx && vhost_notify(dev, vq)) - eventfd_signal(vq->call_ctx, 1); + if (vq->call_ctx.ctx && vhost_notify(dev, vq)) + eventfd_signal(vq->call_ctx.ctx, 1); } EXPORT_SYMBOL_GPL(vhost_signal); diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index c8e96a095d3b..38eb1aa3b68d 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -13,6 +13,7 @@ #include #include #include +#include struct vhost_work; typedef void (*vhost_work_fn_t)(struct vhost_work *work); @@ -60,6 +61,12 @@ enum vhost_uaddr_type { VHOST_NUM_ADDRS = 3, }; +struct vhost_vring_call { + struct eventfd_ctx *ctx; + struct irq_bypass_producer producer; + spinlock_t ctx_lock; +}; + /* The virtqueue structure describes a queue attached to a device. */ struct vhost_virtqueue { struct vhost_dev *dev; @@ -72,7 +79,7 @@ struct vhost_virtqueue { vring_used_t __user *used; const struct vhost_iotlb_map *meta_iotlb[VHOST_NUM_ADDRS]; struct file *kick; - struct eventfd_ctx *call_ctx; + struct vhost_vring_call call_ctx; struct eventfd_ctx *error_ctx; struct eventfd_ctx *log_ctx; From 2edd9cb79fb31b0907c6e0cdce2824780cf9b153 Mon Sep 17 00:00:00 2001 From: Zhu Lingshan Date: Fri, 31 Jul 2020 14:55:29 +0800 Subject: [PATCH 46/71] kvm: detect assigned device via irqbypass manager vDPA devices has dedicated backed hardware like passthrough-ed devices. Then it is possible to setup irq offloading to vCPU for vDPA devices. Thus this patch tries to manipulated assigned device counters by kvm_arch_start/end_assignment() in irqbypass manager, so that assigned devices could be detected in update_pi_irte() We will increase/decrease the assigned device counter in kvm/x86. Both vDPA and VFIO would go through this code path. Only X86 uses these counters and kvm_arch_start/end_assignment(), so this code path only affect x86 for now. Signed-off-by: Zhu Lingshan Suggested-by: Jason Wang Link: https://lore.kernel.org/r/20200731065533.4144-3-lingshan.zhu@intel.com Signed-off-by: Michael S. Tsirkin --- arch/x86/kvm/x86.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 88c593f83b28..76a2e7fd18c7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10630,11 +10630,17 @@ int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons, { struct kvm_kernel_irqfd *irqfd = container_of(cons, struct kvm_kernel_irqfd, consumer); + int ret; irqfd->producer = prod; + kvm_arch_start_assignment(irqfd->kvm); + ret = kvm_x86_ops.update_pi_irte(irqfd->kvm, + prod->irq, irqfd->gsi, 1); - return kvm_x86_ops.update_pi_irte(irqfd->kvm, - prod->irq, irqfd->gsi, 1); + if (ret) + kvm_arch_end_assignment(irqfd->kvm); + + return ret; } void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, @@ -10657,6 +10663,8 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, if (ret) printk(KERN_INFO "irq bypass consumer (token %p) unregistration" " fails: %d\n", irqfd->consumer.token, ret); + + kvm_arch_end_assignment(irqfd->kvm); } int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq, From 7164675ab5caf46867d6a5448f4ff3af92d80a30 Mon Sep 17 00:00:00 2001 From: Zhu Lingshan Date: Fri, 31 Jul 2020 14:55:30 +0800 Subject: [PATCH 47/71] vDPA: add get_vq_irq() in vdpa_config_ops This commit adds a new function get_vq_irq() in struct vdpa_config_ops, which will return the irq number of a virtqueue. Signed-off-by: Zhu Lingshan Suggested-by: Jason Wang Link: https://lore.kernel.org/r/20200731065533.4144-4-lingshan.zhu@intel.com Signed-off-by: Michael S. Tsirkin --- include/linux/vdpa.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 29b8296f1414..5c530a64aa06 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -89,6 +89,12 @@ struct vdpa_device { * @vdev: vdpa device * @idx: virtqueue index * Returns the notifcation area + * @get_vq_irq: Get the irq number of a virtqueue (optional, + * but must implemented if require vq irq offloading) + * @vdev: vdpa device + * @idx: virtqueue index + * Returns int: irq number of a virtqueue, + * negative number if no irq assigned. * @get_vq_align: Get the virtqueue align requirement * for the device * @vdev: vdpa device @@ -180,6 +186,7 @@ struct vdpa_config_ops { u64 (*get_vq_state)(struct vdpa_device *vdev, u16 idx); struct vdpa_notification_area (*get_vq_notification)(struct vdpa_device *vdev, u16 idx); + int (*get_vq_irq)(struct vdpa_device *vdv, u16 idx); /* Device ops */ u32 (*get_vq_align)(struct vdpa_device *vdev); From 2cf1ba9a4d15cb78b96ea97f727b93382c3f9a60 Mon Sep 17 00:00:00 2001 From: Zhu Lingshan Date: Fri, 31 Jul 2020 14:55:31 +0800 Subject: [PATCH 48/71] vhost_vdpa: implement IRQ offloading in vhost_vdpa This patch introduce a set of functions for setup/unsetup and update irq offloading respectively by register/unregister and re-register the irq_bypass_producer. With these functions, this commit can setup/unsetup irq offloading through setting DRIVER_OK/!DRIVER_OK, and update irq offloading through SET_VRING_CALL. Signed-off-by: Zhu Lingshan Suggested-by: Jason Wang Link: https://lore.kernel.org/r/20200731065533.4144-5-lingshan.zhu@intel.com Signed-off-by: Michael S. Tsirkin --- drivers/vhost/Kconfig | 1 + drivers/vhost/vdpa.c | 63 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 63 insertions(+), 1 deletion(-) diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig index d3688c6afb87..587fbae06182 100644 --- a/drivers/vhost/Kconfig +++ b/drivers/vhost/Kconfig @@ -65,6 +65,7 @@ config VHOST_VDPA tristate "Vhost driver for vDPA-based backend" depends on EVENTFD select VHOST + select IRQ_BYPASS_MANAGER depends on VDPA help This kernel module can be loaded in host kernel to accelerate diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 1789e5ffb18f..7441b9803eae 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -82,6 +82,39 @@ static irqreturn_t vhost_vdpa_config_cb(void *private) return IRQ_HANDLED; } +static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid) +{ + struct vhost_virtqueue *vq = &v->vqs[qid]; + const struct vdpa_config_ops *ops = v->vdpa->config; + struct vdpa_device *vdpa = v->vdpa; + int ret, irq; + + if (!ops->get_vq_irq) + return; + + irq = ops->get_vq_irq(vdpa, qid); + spin_lock(&vq->call_ctx.ctx_lock); + irq_bypass_unregister_producer(&vq->call_ctx.producer); + if (!vq->call_ctx.ctx || irq < 0) { + spin_unlock(&vq->call_ctx.ctx_lock); + return; + } + + vq->call_ctx.producer.token = vq->call_ctx.ctx; + vq->call_ctx.producer.irq = irq; + ret = irq_bypass_register_producer(&vq->call_ctx.producer); + spin_unlock(&vq->call_ctx.ctx_lock); +} + +static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid) +{ + struct vhost_virtqueue *vq = &v->vqs[qid]; + + spin_lock(&vq->call_ctx.ctx_lock); + irq_bypass_unregister_producer(&vq->call_ctx.producer); + spin_unlock(&vq->call_ctx.ctx_lock); +} + static void vhost_vdpa_reset(struct vhost_vdpa *v) { struct vdpa_device *vdpa = v->vdpa; @@ -121,11 +154,15 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp) { struct vdpa_device *vdpa = v->vdpa; const struct vdpa_config_ops *ops = vdpa->config; - u8 status; + u8 status, status_old; + int nvqs = v->nvqs; + u16 i; if (copy_from_user(&status, statusp, sizeof(status))) return -EFAULT; + status_old = ops->get_status(vdpa); + /* * Userspace shouldn't remove status bits unless reset the * status to 0. @@ -135,6 +172,15 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp) ops->set_status(vdpa, status); + /* vq irq is not expected to be changed once DRIVER_OK is set */ + if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & VIRTIO_CONFIG_S_DRIVER_OK)) + for (i = 0; i < nvqs; i++) + vhost_vdpa_setup_vq_irq(v, i); + + if ((status_old & VIRTIO_CONFIG_S_DRIVER_OK) && !(status & VIRTIO_CONFIG_S_DRIVER_OK)) + for (i = 0; i < nvqs; i++) + vhost_vdpa_unsetup_vq_irq(v, i); + return 0; } @@ -293,6 +339,7 @@ static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp) return 0; } + static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, void __user *argp) { @@ -351,6 +398,7 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, cb.private = NULL; } ops->set_vq_cb(vdpa, idx, &cb); + vhost_vdpa_setup_vq_irq(v, idx); break; case VHOST_SET_VRING_NUM: @@ -726,6 +774,18 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep) return r; } +static void vhost_vdpa_clean_irq(struct vhost_vdpa *v) +{ + struct vhost_virtqueue *vq; + int i; + + for (i = 0; i < v->nvqs; i++) { + vq = &v->vqs[i]; + if (vq->call_ctx.producer.irq) + irq_bypass_unregister_producer(&vq->call_ctx.producer); + } +} + static int vhost_vdpa_release(struct inode *inode, struct file *filep) { struct vhost_vdpa *v = filep->private_data; @@ -738,6 +798,7 @@ static int vhost_vdpa_release(struct inode *inode, struct file *filep) vhost_vdpa_iotlb_free(v); vhost_vdpa_free_domain(v); vhost_vdpa_config_put(v); + vhost_vdpa_clean_irq(v); vhost_dev_cleanup(&v->vdev); kfree(v->vdev.vqs); mutex_unlock(&d->mutex); From 3597a2fba672cfd4112bbdbe6220127f280c8dba Mon Sep 17 00:00:00 2001 From: Zhu Lingshan Date: Fri, 31 Jul 2020 14:55:32 +0800 Subject: [PATCH 49/71] ifcvf: implement vdpa_config_ops.get_vq_irq() This commit implemented vdpa_config_ops.get_vq_irq() in ifcvf, and initialized vq irq to -EINVAL. So that ifcvf can report irq number of a vq, or -EINVAL if the vq is not assigned an irq number. Signed-off-by: Zhu Lingshan Suggested-by: Jason Wang Link: https://lore.kernel.org/r/20200731065533.4144-6-lingshan.zhu@intel.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/ifcvf/ifcvf_main.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c index f5a60c14b979..a902b29b0d29 100644 --- a/drivers/vdpa/ifcvf/ifcvf_main.c +++ b/drivers/vdpa/ifcvf/ifcvf_main.c @@ -50,8 +50,10 @@ static void ifcvf_free_irq(struct ifcvf_adapter *adapter, int queues) int i; - for (i = 0; i < queues; i++) + for (i = 0; i < queues; i++) { devm_free_irq(&pdev->dev, vf->vring[i].irq, &vf->vring[i]); + vf->vring[i].irq = -EINVAL; + } ifcvf_free_irq_vectors(pdev); } @@ -352,6 +354,14 @@ static void ifcvf_vdpa_set_config_cb(struct vdpa_device *vdpa_dev, vf->config_cb.private = cb->private; } +static int ifcvf_vdpa_get_vq_irq(struct vdpa_device *vdpa_dev, + u16 qid) +{ + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); + + return vf->vring[qid].irq; +} + /* * IFCVF currently does't have on-chip IOMMU, so not * implemented set_map()/dma_map()/dma_unmap() @@ -369,6 +379,7 @@ static const struct vdpa_config_ops ifc_vdpa_ops = { .get_vq_ready = ifcvf_vdpa_get_vq_ready, .set_vq_num = ifcvf_vdpa_set_vq_num, .set_vq_address = ifcvf_vdpa_set_vq_address, + .get_vq_irq = ifcvf_vdpa_get_vq_irq, .kick_vq = ifcvf_vdpa_kick_vq, .get_generation = ifcvf_vdpa_get_generation, .get_device_id = ifcvf_vdpa_get_device_id, @@ -384,7 +395,7 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id) struct device *dev = &pdev->dev; struct ifcvf_adapter *adapter; struct ifcvf_hw *vf; - int ret; + int ret, i; ret = pcim_enable_device(pdev); if (ret) { @@ -441,6 +452,9 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err; } + for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) + vf->vring[i].irq = -EINVAL; + ret = vdpa_register_device(&adapter->vdpa); if (ret) { IFCVF_ERR(pdev, "Failed to register ifcvf to vdpa bus"); From a979a6aa009f3c99689432e0cdb5402a4463fb88 Mon Sep 17 00:00:00 2001 From: Zhu Lingshan Date: Fri, 31 Jul 2020 14:55:33 +0800 Subject: [PATCH 50/71] irqbypass: do not start cons/prod when failed connect If failed to connect, there is no need to start consumer nor producer. Signed-off-by: Zhu Lingshan Suggested-by: Jason Wang Link: https://lore.kernel.org/r/20200731065533.4144-7-lingshan.zhu@intel.com Signed-off-by: Michael S. Tsirkin --- virt/lib/irqbypass.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/virt/lib/irqbypass.c b/virt/lib/irqbypass.c index 28fda42e471b..c9bb3957f58a 100644 --- a/virt/lib/irqbypass.c +++ b/virt/lib/irqbypass.c @@ -40,17 +40,21 @@ static int __connect(struct irq_bypass_producer *prod, if (prod->add_consumer) ret = prod->add_consumer(prod, cons); - if (!ret) { - ret = cons->add_producer(cons, prod); - if (ret && prod->del_consumer) - prod->del_consumer(prod, cons); - } + if (ret) + goto err_add_consumer; + + ret = cons->add_producer(cons, prod); + if (ret) + goto err_add_producer; if (cons->start) cons->start(cons); if (prod->start) prod->start(prod); - +err_add_producer: + if (prod->del_consumer) + prod->del_consumer(prod, cons); +err_add_consumer: return ret; } From 46af9adefd2fd44afa9e8a5020c520812151f849 Mon Sep 17 00:00:00 2001 From: Liao Pingfang Date: Mon, 3 Aug 2020 19:52:24 +0800 Subject: [PATCH 51/71] virtio_pci_modern: Fix the comment of virtio_pci_find_capability() Fix the comment of virtio_pci_find_capability() by adding missing comment for the last parameter: bars. Fixes: 59a5b0f7bf74 ("virtio-pci: alloc only resources actually used.") Signed-off-by: Liao Pingfang Signed-off-by: Yi Wang Link: https://lore.kernel.org/r/1596455545-43556-1-git-send-email-wang.yi59@zte.com.cn Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/virtio/virtio_pci_modern.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index db93cedd262f..9bdc6f68221f 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -481,6 +481,7 @@ static const struct virtio_config_ops virtio_pci_config_ops = { * @dev: the pci device * @cfg_type: the VIRTIO_PCI_CAP_* value we seek * @ioresource_types: IORESOURCE_MEM and/or IORESOURCE_IO. + * @bars: the bitmask of BARs * * Returns offset of the capability, or 0. */ From 4c05433bc6fb4ae172270f0279be8ba89a3da64f Mon Sep 17 00:00:00 2001 From: Zhu Lingshan Date: Tue, 4 Aug 2020 18:21:23 +0800 Subject: [PATCH 52/71] vDPA: dont change vq irq after DRIVER_OK IRQ of a vq is not expected to be changed in a DRIVER_OK ~ !DRIVER_OK period for irq offloading purposes. Place this comment at the side of bus ops get_vq_irq than in set_status in vhost_vdpa. Signed-off-by: Zhu Lingshan Link: https://lore.kernel.org/r/20200804102123.69978-1-lingshan.zhu@intel.com Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vdpa.c | 1 - include/linux/vdpa.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 7441b9803eae..f8f8c9cf05b0 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -172,7 +172,6 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp) ops->set_status(vdpa, status); - /* vq irq is not expected to be changed once DRIVER_OK is set */ if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & VIRTIO_CONFIG_S_DRIVER_OK)) for (i = 0; i < nvqs; i++) vhost_vdpa_setup_vq_irq(v, i); diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 5c530a64aa06..565298cb45d2 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -186,6 +186,7 @@ struct vdpa_config_ops { u64 (*get_vq_state)(struct vdpa_device *vdev, u16 idx); struct vdpa_notification_area (*get_vq_notification)(struct vdpa_device *vdev, u16 idx); + /* vq irq is not expected to be changed once DRIVER_OK is set */ int (*get_vq_irq)(struct vdpa_device *vdv, u16 idx); /* Device ops */ From b0bd82bf729dbd05757e599f20df38dff5b97091 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 4 Aug 2020 19:20:37 +0300 Subject: [PATCH 53/71] vhost-vdpa: refine ioctl pre-processing Switch to use 'switch' to make the codes more easier to be extended. Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20200804162048.22587-2-eli@mellanox.com Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vdpa.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index f8f8c9cf05b0..01475bec6e0d 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -360,15 +360,16 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, idx = array_index_nospec(idx, v->nvqs); vq = &v->vqs[idx]; - if (cmd == VHOST_VDPA_SET_VRING_ENABLE) { + switch (cmd) { + case VHOST_VDPA_SET_VRING_ENABLE: if (copy_from_user(&s, argp, sizeof(s))) return -EFAULT; ops->set_vq_ready(vdpa, idx, s.num); return 0; - } - - if (cmd == VHOST_GET_VRING_BASE) + case VHOST_GET_VRING_BASE: vq->last_avail_idx = ops->get_vq_state(v->vdpa, idx); + break; + } r = vhost_vring_ioctl(&v->vdev, cmd, argp); if (r) From 460f7ce19f50e612a80b8cd0d2e38f2e14e765f6 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 4 Aug 2020 19:20:38 +0300 Subject: [PATCH 54/71] vhost: generialize backend features setting/getting Move the backend features setting/getting from net.c to vhost.c to be reused by vhost-vdpa. Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20200804162048.22587-3-eli@mellanox.com Signed-off-by: Michael S. Tsirkin --- drivers/vhost/net.c | 18 ++---------------- drivers/vhost/vhost.c | 15 +++++++++++++++ drivers/vhost/vhost.h | 2 ++ 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 8e0921d3805d..bfbbe5c876f9 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -1615,21 +1615,6 @@ static long vhost_net_reset_owner(struct vhost_net *n) return err; } -static int vhost_net_set_backend_features(struct vhost_net *n, u64 features) -{ - int i; - - mutex_lock(&n->dev.mutex); - for (i = 0; i < VHOST_NET_VQ_MAX; ++i) { - mutex_lock(&n->vqs[i].vq.mutex); - n->vqs[i].vq.acked_backend_features = features; - mutex_unlock(&n->vqs[i].vq.mutex); - } - mutex_unlock(&n->dev.mutex); - - return 0; -} - static int vhost_net_set_features(struct vhost_net *n, u64 features) { size_t vhost_hlen, sock_hlen, hdr_len; @@ -1730,7 +1715,8 @@ static long vhost_net_ioctl(struct file *f, unsigned int ioctl, return -EFAULT; if (features & ~VHOST_NET_BACKEND_FEATURES) return -EOPNOTSUPP; - return vhost_net_set_backend_features(n, features); + vhost_set_backend_features(&n->dev, features); + return 0; case VHOST_RESET_OWNER: return vhost_net_reset_owner(n); case VHOST_SET_OWNER: diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 8f622064b3e8..5e5cc3dd983e 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -2591,6 +2591,21 @@ struct vhost_msg_node *vhost_dequeue_msg(struct vhost_dev *dev, } EXPORT_SYMBOL_GPL(vhost_dequeue_msg); +void vhost_set_backend_features(struct vhost_dev *dev, u64 features) +{ + struct vhost_virtqueue *vq; + int i; + + mutex_lock(&dev->mutex); + for (i = 0; i < dev->nvqs; ++i) { + vq = dev->vqs[i]; + mutex_lock(&vq->mutex); + vq->acked_backend_features = features; + mutex_unlock(&vq->mutex); + } + mutex_unlock(&dev->mutex); +} +EXPORT_SYMBOL_GPL(vhost_set_backend_features); static int __init vhost_init(void) { diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 38eb1aa3b68d..9032d3c2a9f4 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -214,6 +214,8 @@ void vhost_enqueue_msg(struct vhost_dev *dev, struct vhost_msg_node *node); struct vhost_msg_node *vhost_dequeue_msg(struct vhost_dev *dev, struct list_head *head); +void vhost_set_backend_features(struct vhost_dev *dev, u64 features); + __poll_t vhost_chr_poll(struct file *file, struct vhost_dev *dev, poll_table *wait); ssize_t vhost_chr_read_iter(struct vhost_dev *dev, struct iov_iter *to, From 653055b9acd45d602435f2f70b7a85cb3130f018 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 4 Aug 2020 19:20:39 +0300 Subject: [PATCH 55/71] vhost-vdpa: support get/set backend features This patch makes userspace can get and set backend features to vhost-vdpa. Signed-off-by: Cindy Lu Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20200804162048.22587-4-eli@mellanox.com Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vdpa.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 01475bec6e0d..61c17d34cb39 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -26,6 +26,10 @@ #include "vhost.h" +enum { + VHOST_VDPA_BACKEND_FEATURES = (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) +}; + /* Currently, only network backend w/o multiqueue is supported. */ #define VHOST_VDPA_VQ_MAX 2 @@ -347,6 +351,8 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, struct vdpa_callback cb; struct vhost_virtqueue *vq; struct vhost_vring_state s; + u64 __user *featurep = argp; + u64 features; u32 idx; long r; @@ -369,6 +375,18 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, case VHOST_GET_VRING_BASE: vq->last_avail_idx = ops->get_vq_state(v->vdpa, idx); break; + case VHOST_GET_BACKEND_FEATURES: + features = VHOST_VDPA_BACKEND_FEATURES; + if (copy_to_user(featurep, &features, sizeof(features))) + return -EFAULT; + return 0; + case VHOST_SET_BACKEND_FEATURES: + if (copy_from_user(&features, featurep, sizeof(features))) + return -EFAULT; + if (features & ~VHOST_VDPA_BACKEND_FEATURES) + return -EOPNOTSUPP; + vhost_set_backend_features(&v->vdev, features); + return 0; } r = vhost_vring_ioctl(&v->vdev, cmd, argp); From 25abc060d282132ea5c945392f900dca0a7e9bbb Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 4 Aug 2020 19:20:40 +0300 Subject: [PATCH 56/71] vhost-vdpa: support IOTLB batching hints This patches extend the vhost IOTLB API to accept batch updating hints form userspace. When userspace wants update the device IOTLB in a batch, it may do: 1) Write vhost_iotlb_msg with VHOST_IOTLB_BATCH_BEGIN flag 2) Perform a batch of IOTLB updating via VHOST_IOTLB_UPDATE/INVALIDATE 3) Write vhost_iotlb_msg with VHOST_IOTLB_BATCH_END flag Vhost-vdpa may decide to batch the IOMMU/IOTLB updating in step 3 when vDPA device support set_map() ops. This is useful for the vDPA device that want to know all the mappings to tweak their own DMA translation logic. For vDPA device that doesn't require set_map(), no behavior changes. This capability is advertised via VHOST_BACKEND_F_IOTLB_BATCH capability. Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20200804162048.22587-5-eli@mellanox.com Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vdpa.c | 36 ++++++++++++++++++++++++-------- include/uapi/linux/vhost.h | 2 ++ include/uapi/linux/vhost_types.h | 11 ++++++++++ 3 files changed, 40 insertions(+), 9 deletions(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 61c17d34cb39..e80db051845d 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -27,7 +27,9 @@ #include "vhost.h" enum { - VHOST_VDPA_BACKEND_FEATURES = (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) + VHOST_VDPA_BACKEND_FEATURES = + (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) | + (1ULL << VHOST_BACKEND_F_IOTLB_BATCH), }; /* Currently, only network backend w/o multiqueue is supported. */ @@ -48,6 +50,7 @@ struct vhost_vdpa { int virtio_id; int minor; struct eventfd_ctx *config_ctx; + int in_batch; }; static DEFINE_IDA(vhost_vdpa_ida); @@ -124,6 +127,7 @@ static void vhost_vdpa_reset(struct vhost_vdpa *v) struct vdpa_device *vdpa = v->vdpa; vdpa_reset(vdpa); + v->in_batch = 0; } static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp) @@ -546,13 +550,15 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, if (r) return r; - if (ops->dma_map) + if (ops->dma_map) { r = ops->dma_map(vdpa, iova, size, pa, perm); - else if (ops->set_map) - r = ops->set_map(vdpa, dev->iotlb); - else + } else if (ops->set_map) { + if (!v->in_batch) + r = ops->set_map(vdpa, dev->iotlb); + } else { r = iommu_map(v->domain, iova, pa, size, perm_to_iommu_flags(perm)); + } return r; } @@ -565,12 +571,14 @@ static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size) vhost_vdpa_iotlb_unmap(v, iova, iova + size - 1); - if (ops->dma_map) + if (ops->dma_map) { ops->dma_unmap(vdpa, iova, size); - else if (ops->set_map) - ops->set_map(vdpa, dev->iotlb); - else + } else if (ops->set_map) { + if (!v->in_batch) + ops->set_map(vdpa, dev->iotlb); + } else { iommu_unmap(v->domain, iova, size); + } } static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, @@ -663,6 +671,8 @@ static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, struct vhost_iotlb_msg *msg) { struct vhost_vdpa *v = container_of(dev, struct vhost_vdpa, vdev); + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; int r = 0; r = vhost_dev_check_owner(dev); @@ -676,6 +686,14 @@ static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, case VHOST_IOTLB_INVALIDATE: vhost_vdpa_unmap(v, msg->iova, msg->size); break; + case VHOST_IOTLB_BATCH_BEGIN: + v->in_batch = true; + break; + case VHOST_IOTLB_BATCH_END: + if (v->in_batch && ops->set_map) + ops->set_map(vdpa, dev->iotlb); + v->in_batch = false; + break; default: r = -EINVAL; break; diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index 0c2349612e77..75232185324a 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -91,6 +91,8 @@ /* Use message type V2 */ #define VHOST_BACKEND_F_IOTLB_MSG_V2 0x1 +/* IOTLB can accept batching hints */ +#define VHOST_BACKEND_F_IOTLB_BATCH 0x2 #define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64) #define VHOST_GET_BACKEND_FEATURES _IOR(VHOST_VIRTIO, 0x26, __u64) diff --git a/include/uapi/linux/vhost_types.h b/include/uapi/linux/vhost_types.h index 669457ce5c48..9a269a88a6ff 100644 --- a/include/uapi/linux/vhost_types.h +++ b/include/uapi/linux/vhost_types.h @@ -60,6 +60,17 @@ struct vhost_iotlb_msg { #define VHOST_IOTLB_UPDATE 2 #define VHOST_IOTLB_INVALIDATE 3 #define VHOST_IOTLB_ACCESS_FAIL 4 +/* + * VHOST_IOTLB_BATCH_BEGIN and VHOST_IOTLB_BATCH_END allow modifying + * multiple mappings in one go: beginning with + * VHOST_IOTLB_BATCH_BEGIN, followed by any number of + * VHOST_IOTLB_UPDATE messages, and ending with VHOST_IOTLB_BATCH_END. + * When one of these two values is used as the message type, the rest + * of the fields in the message are ignored. There's no guarantee that + * these changes take place automatically in the device. + */ +#define VHOST_IOTLB_BATCH_BEGIN 5 +#define VHOST_IOTLB_BATCH_END 6 __u8 type; }; From de91a4d0e725db34db64502fad84e8fb1026146b Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 4 Aug 2020 19:20:41 +0300 Subject: [PATCH 57/71] vdpasim: support batch updating The vDPA simulator support both set_map() and dma_map()/dma_unmap() operations. But vhost-vdpa can only use one of them. So this patch introduce a module parameter (batch_mapping) that let vpda_sim to support only one of those dma operations. The batched mapping via set_map() is enabled by default. Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20200804162048.22587-6-eli@mellanox.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 40 +++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 2a55b463e64c..7b34d663778f 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -34,6 +34,10 @@ #define DRV_DESC "vDPA Device Simulator" #define DRV_LICENSE "GPL v2" +static int batch_mapping = 1; +module_param(batch_mapping, int, 0444); +MODULE_PARM_DESC(batch_mapping, "Batched mapping 1 -Enable; 0 - Disable"); + struct vdpasim_virtqueue { struct vringh vring; struct vringh_kiov iov; @@ -334,15 +338,21 @@ static const struct dma_map_ops vdpasim_dma_ops = { }; static const struct vdpa_config_ops vdpasim_net_config_ops; +static const struct vdpa_config_ops vdpasim_net_batch_config_ops; static struct vdpasim *vdpasim_create(void) { + const struct vdpa_config_ops *ops; struct vdpasim *vdpasim; struct device *dev; int ret = -ENOMEM; - vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, - &vdpasim_net_config_ops); + if (batch_mapping) + ops = &vdpasim_net_batch_config_ops; + else + ops = &vdpasim_net_config_ops; + + vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops); if (!vdpasim) goto err_alloc; @@ -643,12 +653,36 @@ static const struct vdpa_config_ops vdpasim_net_config_ops = { .get_config = vdpasim_get_config, .set_config = vdpasim_set_config, .get_generation = vdpasim_get_generation, - .set_map = vdpasim_set_map, .dma_map = vdpasim_dma_map, .dma_unmap = vdpasim_dma_unmap, .free = vdpasim_free, }; +static const struct vdpa_config_ops vdpasim_net_batch_config_ops = { + .set_vq_address = vdpasim_set_vq_address, + .set_vq_num = vdpasim_set_vq_num, + .kick_vq = vdpasim_kick_vq, + .set_vq_cb = vdpasim_set_vq_cb, + .set_vq_ready = vdpasim_set_vq_ready, + .get_vq_ready = vdpasim_get_vq_ready, + .set_vq_state = vdpasim_set_vq_state, + .get_vq_state = vdpasim_get_vq_state, + .get_vq_align = vdpasim_get_vq_align, + .get_features = vdpasim_get_features, + .set_features = vdpasim_set_features, + .set_config_cb = vdpasim_set_config_cb, + .get_vq_num_max = vdpasim_get_vq_num_max, + .get_device_id = vdpasim_get_device_id, + .get_vendor_id = vdpasim_get_vendor_id, + .get_status = vdpasim_get_status, + .set_status = vdpasim_set_status, + .get_config = vdpasim_get_config, + .set_config = vdpasim_set_config, + .get_generation = vdpasim_get_generation, + .set_map = vdpasim_set_map, + .free = vdpasim_free, +}; + static int __init vdpasim_dev_init(void) { vdpasim_dev = vdpasim_create(); From a9974489b61c09c702c85c6cba3d1a3fd1be7a15 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Tue, 4 Aug 2020 19:20:42 +0300 Subject: [PATCH 58/71] vdpa: remove hard coded virtq num This will enable vdpa providers to add support for multi queue feature and publish it to upper layers (vhost and virtio). Signed-off-by: Max Gurtovoy Reviewed-by: Jason Wang Link: https://lore.kernel.org/r/20200804162048.22587-7-eli@mellanox.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/ifcvf/ifcvf_main.c | 3 ++- drivers/vdpa/vdpa.c | 3 +++ drivers/vdpa/vdpa_sim/vdpa_sim.c | 4 ++-- drivers/vhost/vdpa.c | 9 +++------ include/linux/vdpa.h | 6 ++++-- 5 files changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c index a902b29b0d29..7c93225367db 100644 --- a/drivers/vdpa/ifcvf/ifcvf_main.c +++ b/drivers/vdpa/ifcvf/ifcvf_main.c @@ -431,7 +431,8 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id) } adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa, - dev, &ifc_vdpa_ops); + dev, &ifc_vdpa_ops, + IFCVF_MAX_QUEUE_PAIRS * 2); if (adapter == NULL) { IFCVF_ERR(pdev, "Failed to allocate vDPA structure"); return -ENOMEM; diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index 7105265e4793..a69ffc991e13 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -61,6 +61,7 @@ static void vdpa_release_dev(struct device *d) * initialized but before registered. * @parent: the parent device * @config: the bus operations that is supported by this device + * @nvqs: number of virtqueues supported by this device * @size: size of the parent structure that contains private data * * Driver should use vdpa_alloc_device() wrapper macro instead of @@ -71,6 +72,7 @@ static void vdpa_release_dev(struct device *d) */ struct vdpa_device *__vdpa_alloc_device(struct device *parent, const struct vdpa_config_ops *config, + int nvqs, size_t size) { struct vdpa_device *vdev; @@ -97,6 +99,7 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent, vdev->index = err; vdev->config = config; vdev->features_valid = false; + vdev->nvqs = nvqs; err = dev_set_name(&vdev->dev, "vdpa%u", vdev->index); if (err) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 7b34d663778f..58baff89cc29 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -65,7 +65,7 @@ static u64 vdpasim_features = (1ULL << VIRTIO_F_ANY_LAYOUT) | /* State of each vdpasim device */ struct vdpasim { struct vdpa_device vdpa; - struct vdpasim_virtqueue vqs[2]; + struct vdpasim_virtqueue vqs[VDPASIM_VQ_NUM]; struct work_struct work; /* spinlock to synchronize virtqueue state */ spinlock_t lock; @@ -352,7 +352,7 @@ static struct vdpasim *vdpasim_create(void) else ops = &vdpasim_net_config_ops; - vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops); + vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops, VDPASIM_VQ_NUM); if (!vdpasim) goto err_alloc; diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index e80db051845d..2d8c950ad3a8 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -32,9 +32,6 @@ enum { (1ULL << VHOST_BACKEND_F_IOTLB_BATCH), }; -/* Currently, only network backend w/o multiqueue is supported. */ -#define VHOST_VDPA_VQ_MAX 2 - #define VHOST_VDPA_DEV_MAX (1U << MINORBITS) struct vhost_vdpa { @@ -930,7 +927,7 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa) { const struct vdpa_config_ops *ops = vdpa->config; struct vhost_vdpa *v; - int minor, nvqs = VHOST_VDPA_VQ_MAX; + int minor; int r; /* Currently, we only accept the network devices. */ @@ -951,14 +948,14 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa) atomic_set(&v->opened, 0); v->minor = minor; v->vdpa = vdpa; - v->nvqs = nvqs; + v->nvqs = vdpa->nvqs; v->virtio_id = ops->get_device_id(vdpa); device_initialize(&v->dev); v->dev.release = vhost_vdpa_release_dev; v->dev.parent = &vdpa->dev; v->dev.devt = MKDEV(MAJOR(vhost_vdpa_major), minor); - v->vqs = kmalloc_array(nvqs, sizeof(struct vhost_virtqueue), + v->vqs = kmalloc_array(v->nvqs, sizeof(struct vhost_virtqueue), GFP_KERNEL); if (!v->vqs) { r = -ENOMEM; diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 565298cb45d2..b5901cde73e0 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -41,6 +41,7 @@ struct vdpa_device { const struct vdpa_config_ops *config; unsigned int index; bool features_valid; + int nvqs; }; /** @@ -218,11 +219,12 @@ struct vdpa_config_ops { struct vdpa_device *__vdpa_alloc_device(struct device *parent, const struct vdpa_config_ops *config, + int nvqs, size_t size); -#define vdpa_alloc_device(dev_struct, member, parent, config) \ +#define vdpa_alloc_device(dev_struct, member, parent, config, nvqs) \ container_of(__vdpa_alloc_device( \ - parent, config, \ + parent, config, nvqs, \ sizeof(dev_struct) + \ BUILD_BUG_ON_ZERO(offsetof( \ dev_struct, member))), \ From aac50c0bd434794b9950181349099e709ca4edad Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Tue, 4 Aug 2020 19:20:43 +0300 Subject: [PATCH 59/71] net/vdpa: Use struct for set/get vq state For now VQ state involves 16 bit available index value encoded in u64 variable. In the future it will be extended to contain more fields. Use struct to contain the state, now containing only a single u16 for the available index. In the future we can add fields to this struct. Reviewed-by: Parav Pandit Acked-by: Jason Wang Signed-off-by: Eli Cohen Link: https://lore.kernel.org/r/20200804162048.22587-8-eli@mellanox.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/ifcvf/ifcvf_base.c | 4 ++-- drivers/vdpa/ifcvf/ifcvf_base.h | 4 ++-- drivers/vdpa/ifcvf/ifcvf_main.c | 9 +++++---- drivers/vdpa/vdpa_sim/vdpa_sim.c | 10 ++++++---- drivers/vhost/vdpa.c | 7 +++++-- include/linux/vdpa.h | 18 ++++++++++++++---- 6 files changed, 34 insertions(+), 18 deletions(-) diff --git a/drivers/vdpa/ifcvf/ifcvf_base.c b/drivers/vdpa/ifcvf/ifcvf_base.c index 94bf0328b68d..f2a128e56de5 100644 --- a/drivers/vdpa/ifcvf/ifcvf_base.c +++ b/drivers/vdpa/ifcvf/ifcvf_base.c @@ -272,7 +272,7 @@ static int ifcvf_config_features(struct ifcvf_hw *hw) return 0; } -u64 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid) +u16 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid) { struct ifcvf_lm_cfg __iomem *ifcvf_lm; void __iomem *avail_idx_addr; @@ -287,7 +287,7 @@ u64 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid) return last_avail_idx; } -int ifcvf_set_vq_state(struct ifcvf_hw *hw, u16 qid, u64 num) +int ifcvf_set_vq_state(struct ifcvf_hw *hw, u16 qid, u16 num) { struct ifcvf_lm_cfg __iomem *ifcvf_lm; void __iomem *avail_idx_addr; diff --git a/drivers/vdpa/ifcvf/ifcvf_base.h b/drivers/vdpa/ifcvf/ifcvf_base.h index 24af422b5a3e..08f267a2aafe 100644 --- a/drivers/vdpa/ifcvf/ifcvf_base.h +++ b/drivers/vdpa/ifcvf/ifcvf_base.h @@ -116,7 +116,7 @@ void ifcvf_set_status(struct ifcvf_hw *hw, u8 status); void io_write64_twopart(u64 val, u32 *lo, u32 *hi); void ifcvf_reset(struct ifcvf_hw *hw); u64 ifcvf_get_features(struct ifcvf_hw *hw); -u64 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid); -int ifcvf_set_vq_state(struct ifcvf_hw *hw, u16 qid, u64 num); +u16 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid); +int ifcvf_set_vq_state(struct ifcvf_hw *hw, u16 qid, u16 num); struct ifcvf_adapter *vf_to_adapter(struct ifcvf_hw *hw); #endif /* _IFCVF_H_ */ diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c index 7c93225367db..dc311e972b9e 100644 --- a/drivers/vdpa/ifcvf/ifcvf_main.c +++ b/drivers/vdpa/ifcvf/ifcvf_main.c @@ -237,19 +237,20 @@ static u16 ifcvf_vdpa_get_vq_num_max(struct vdpa_device *vdpa_dev) return IFCVF_QUEUE_MAX; } -static u64 ifcvf_vdpa_get_vq_state(struct vdpa_device *vdpa_dev, u16 qid) +static void ifcvf_vdpa_get_vq_state(struct vdpa_device *vdpa_dev, u16 qid, + struct vdpa_vq_state *state) { struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); - return ifcvf_get_vq_state(vf, qid); + state->avail_index = ifcvf_get_vq_state(vf, qid); } static int ifcvf_vdpa_set_vq_state(struct vdpa_device *vdpa_dev, u16 qid, - u64 num) + const struct vdpa_vq_state *state) { struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); - return ifcvf_set_vq_state(vf, qid, num); + return ifcvf_set_vq_state(vf, qid, state->avail_index); } static void ifcvf_vdpa_set_vq_cb(struct vdpa_device *vdpa_dev, u16 qid, diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 58baff89cc29..c93126ad09d1 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -450,26 +450,28 @@ static bool vdpasim_get_vq_ready(struct vdpa_device *vdpa, u16 idx) return vq->ready; } -static int vdpasim_set_vq_state(struct vdpa_device *vdpa, u16 idx, u64 state) +static int vdpasim_set_vq_state(struct vdpa_device *vdpa, u16 idx, + const struct vdpa_vq_state *state) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; struct vringh *vrh = &vq->vring; spin_lock(&vdpasim->lock); - vrh->last_avail_idx = state; + vrh->last_avail_idx = state->avail_index; spin_unlock(&vdpasim->lock); return 0; } -static u64 vdpasim_get_vq_state(struct vdpa_device *vdpa, u16 idx) +static void vdpasim_get_vq_state(struct vdpa_device *vdpa, u16 idx, + struct vdpa_vq_state *state) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; struct vringh *vrh = &vq->vring; - return vrh->last_avail_idx; + state->avail_index = vrh->last_avail_idx; } static u32 vdpasim_get_vq_align(struct vdpa_device *vdpa) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 2d8c950ad3a8..066b165c17b1 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -349,6 +349,7 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, { struct vdpa_device *vdpa = v->vdpa; const struct vdpa_config_ops *ops = vdpa->config; + struct vdpa_vq_state vq_state; struct vdpa_callback cb; struct vhost_virtqueue *vq; struct vhost_vring_state s; @@ -374,7 +375,8 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, ops->set_vq_ready(vdpa, idx, s.num); return 0; case VHOST_GET_VRING_BASE: - vq->last_avail_idx = ops->get_vq_state(v->vdpa, idx); + ops->get_vq_state(v->vdpa, idx, &vq_state); + vq->last_avail_idx = vq_state.avail_index; break; case VHOST_GET_BACKEND_FEATURES: features = VHOST_VDPA_BACKEND_FEATURES; @@ -404,7 +406,8 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, break; case VHOST_SET_VRING_BASE: - if (ops->set_vq_state(vdpa, idx, vq->last_avail_idx)) + vq_state.avail_index = vq->last_avail_idx; + if (ops->set_vq_state(vdpa, idx, &vq_state)) r = -EINVAL; break; diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index b5901cde73e0..d7399c983734 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -27,6 +27,14 @@ struct vdpa_notification_area { resource_size_t size; }; +/** + * vDPA vq_state definition + * @avail_index: available index + */ +struct vdpa_vq_state { + u16 avail_index; +}; + /** * vDPA device - representation of a vDPA device * @dev: underlying device @@ -80,12 +88,12 @@ struct vdpa_device { * @set_vq_state: Set the state for a virtqueue * @vdev: vdpa device * @idx: virtqueue index - * @state: virtqueue state (last_avail_idx) + * @state: pointer to set virtqueue state (last_avail_idx) * Returns integer: success (0) or error (< 0) * @get_vq_state: Get the state for a virtqueue * @vdev: vdpa device * @idx: virtqueue index - * Returns virtqueue state (last_avail_idx) + * @state: pointer to returned state (last_avail_idx) * @get_vq_notification: Get the notification area for a virtqueue * @vdev: vdpa device * @idx: virtqueue index @@ -183,8 +191,10 @@ struct vdpa_config_ops { struct vdpa_callback *cb); void (*set_vq_ready)(struct vdpa_device *vdev, u16 idx, bool ready); bool (*get_vq_ready)(struct vdpa_device *vdev, u16 idx); - int (*set_vq_state)(struct vdpa_device *vdev, u16 idx, u64 state); - u64 (*get_vq_state)(struct vdpa_device *vdev, u16 idx); + int (*set_vq_state)(struct vdpa_device *vdev, u16 idx, + const struct vdpa_vq_state *state); + void (*get_vq_state)(struct vdpa_device *vdev, u16 idx, + struct vdpa_vq_state *state); struct vdpa_notification_area (*get_vq_notification)(struct vdpa_device *vdev, u16 idx); /* vq irq is not expected to be changed once DRIVER_OK is set */ From 23750e39d57433d0e3d89658f0bc448f9c42ff49 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Tue, 4 Aug 2020 19:20:44 +0300 Subject: [PATCH 60/71] vdpa: Modify get_vq_state() to return error code Modify get_vq_state() so it returns an error code. In case of hardware acceleration, the available index may be retrieved from the device, an operation that can possibly fail. Reviewed-by: Parav Pandit Signed-off-by: Eli Cohen Link: https://lore.kernel.org/r/20200804162048.22587-9-eli@mellanox.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/ifcvf/ifcvf_main.c | 5 +++-- drivers/vdpa/vdpa_sim/vdpa_sim.c | 5 +++-- drivers/vhost/vdpa.c | 5 ++++- include/linux/vdpa.h | 4 ++-- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c index dc311e972b9e..076d7ac5e723 100644 --- a/drivers/vdpa/ifcvf/ifcvf_main.c +++ b/drivers/vdpa/ifcvf/ifcvf_main.c @@ -237,12 +237,13 @@ static u16 ifcvf_vdpa_get_vq_num_max(struct vdpa_device *vdpa_dev) return IFCVF_QUEUE_MAX; } -static void ifcvf_vdpa_get_vq_state(struct vdpa_device *vdpa_dev, u16 qid, - struct vdpa_vq_state *state) +static int ifcvf_vdpa_get_vq_state(struct vdpa_device *vdpa_dev, u16 qid, + struct vdpa_vq_state *state) { struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); state->avail_index = ifcvf_get_vq_state(vf, qid); + return 0; } static int ifcvf_vdpa_set_vq_state(struct vdpa_device *vdpa_dev, u16 qid, diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index c93126ad09d1..df3224b138ee 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -464,14 +464,15 @@ static int vdpasim_set_vq_state(struct vdpa_device *vdpa, u16 idx, return 0; } -static void vdpasim_get_vq_state(struct vdpa_device *vdpa, u16 idx, - struct vdpa_vq_state *state) +static int vdpasim_get_vq_state(struct vdpa_device *vdpa, u16 idx, + struct vdpa_vq_state *state) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; struct vringh *vrh = &vq->vring; state->avail_index = vrh->last_avail_idx; + return 0; } static u32 vdpasim_get_vq_align(struct vdpa_device *vdpa) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 066b165c17b1..3fab94f88894 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -375,7 +375,10 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, ops->set_vq_ready(vdpa, idx, s.num); return 0; case VHOST_GET_VRING_BASE: - ops->get_vq_state(v->vdpa, idx, &vq_state); + r = ops->get_vq_state(v->vdpa, idx, &vq_state); + if (r) + return r; + vq->last_avail_idx = vq_state.avail_index; break; case VHOST_GET_BACKEND_FEATURES: diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index d7399c983734..eae0bfd87d91 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -193,8 +193,8 @@ struct vdpa_config_ops { bool (*get_vq_ready)(struct vdpa_device *vdev, u16 idx); int (*set_vq_state)(struct vdpa_device *vdev, u16 idx, const struct vdpa_vq_state *state); - void (*get_vq_state)(struct vdpa_device *vdev, u16 idx, - struct vdpa_vq_state *state); + int (*get_vq_state)(struct vdpa_device *vdev, u16 idx, + struct vdpa_vq_state *state); struct vdpa_notification_area (*get_vq_notification)(struct vdpa_device *vdev, u16 idx); /* vq irq is not expected to be changed once DRIVER_OK is set */ From 89349be659d63767cf79e23767da84408a33cd73 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Tue, 4 Aug 2020 19:20:45 +0300 Subject: [PATCH 61/71] vdpa/mlx5: Add hardware descriptive header file Keep all vdpa related hardware definitions in this file. Reviewed-by: Parav Pandit Signed-off-by: Eli Cohen Link: https://lore.kernel.org/r/20200804162048.22587-10-eli@mellanox.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/mlx5/core/mlx5_vdpa_ifc.h | 168 +++++++++++++++++++++++++ 1 file changed, 168 insertions(+) create mode 100644 drivers/vdpa/mlx5/core/mlx5_vdpa_ifc.h diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa_ifc.h b/drivers/vdpa/mlx5/core/mlx5_vdpa_ifc.h new file mode 100644 index 000000000000..f6f57a29b38e --- /dev/null +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa_ifc.h @@ -0,0 +1,168 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies Ltd. */ + +#ifndef __MLX5_VDPA_IFC_H_ +#define __MLX5_VDPA_IFC_H_ + +#include + +enum { + MLX5_VIRTIO_Q_EVENT_MODE_NO_MSIX_MODE = 0x0, + MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE = 0x1, + MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE = 0x2, +}; + +enum { + MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT = 0x1, // do I check this caps? + MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED = 0x2, +}; + +enum { + MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT = 0, + MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED = 1, +}; + +struct mlx5_ifc_virtio_q_bits { + u8 virtio_q_type[0x8]; + u8 reserved_at_8[0x5]; + u8 event_mode[0x3]; + u8 queue_index[0x10]; + + u8 full_emulation[0x1]; + u8 virtio_version_1_0[0x1]; + u8 reserved_at_22[0x2]; + u8 offload_type[0x4]; + u8 event_qpn_or_msix[0x18]; + + u8 doorbell_stride_index[0x10]; + u8 queue_size[0x10]; + + u8 device_emulation_id[0x20]; + + u8 desc_addr[0x40]; + + u8 used_addr[0x40]; + + u8 available_addr[0x40]; + + u8 virtio_q_mkey[0x20]; + + u8 max_tunnel_desc[0x10]; + u8 reserved_at_170[0x8]; + u8 error_type[0x8]; + + u8 umem_1_id[0x20]; + + u8 umem_1_size[0x20]; + + u8 umem_1_offset[0x40]; + + u8 umem_2_id[0x20]; + + u8 umem_2_size[0x20]; + + u8 umem_2_offset[0x40]; + + u8 umem_3_id[0x20]; + + u8 umem_3_size[0x20]; + + u8 umem_3_offset[0x40]; + + u8 counter_set_id[0x20]; + + u8 reserved_at_320[0x8]; + u8 pd[0x18]; + + u8 reserved_at_340[0xc0]; +}; + +struct mlx5_ifc_virtio_net_q_object_bits { + u8 modify_field_select[0x40]; + + u8 reserved_at_40[0x20]; + + u8 vhca_id[0x10]; + u8 reserved_at_70[0x10]; + + u8 queue_feature_bit_mask_12_3[0xa]; + u8 dirty_bitmap_dump_enable[0x1]; + u8 vhost_log_page[0x5]; + u8 reserved_at_90[0xc]; + u8 state[0x4]; + + u8 reserved_at_a0[0x5]; + u8 queue_feature_bit_mask_2_0[0x3]; + u8 tisn_or_qpn[0x18]; + + u8 dirty_bitmap_mkey[0x20]; + + u8 dirty_bitmap_size[0x20]; + + u8 dirty_bitmap_addr[0x40]; + + u8 hw_available_index[0x10]; + u8 hw_used_index[0x10]; + + u8 reserved_at_160[0xa0]; + + struct mlx5_ifc_virtio_q_bits virtio_q_context; +}; + +struct mlx5_ifc_create_virtio_net_q_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits general_obj_in_cmd_hdr; + + struct mlx5_ifc_virtio_net_q_object_bits obj_context; +}; + +struct mlx5_ifc_create_virtio_net_q_out_bits { + struct mlx5_ifc_general_obj_out_cmd_hdr_bits general_obj_out_cmd_hdr; +}; + +struct mlx5_ifc_destroy_virtio_net_q_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits general_obj_out_cmd_hdr; +}; + +struct mlx5_ifc_destroy_virtio_net_q_out_bits { + struct mlx5_ifc_general_obj_out_cmd_hdr_bits general_obj_out_cmd_hdr; +}; + +struct mlx5_ifc_query_virtio_net_q_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits general_obj_in_cmd_hdr; +}; + +struct mlx5_ifc_query_virtio_net_q_out_bits { + struct mlx5_ifc_general_obj_out_cmd_hdr_bits general_obj_out_cmd_hdr; + + struct mlx5_ifc_virtio_net_q_object_bits obj_context; +}; + +enum { + MLX5_VIRTQ_MODIFY_MASK_STATE = (u64)1 << 0, + MLX5_VIRTQ_MODIFY_MASK_DIRTY_BITMAP_PARAMS = (u64)1 << 3, + MLX5_VIRTQ_MODIFY_MASK_DIRTY_BITMAP_DUMP_ENABLE = (u64)1 << 4, +}; + +enum { + MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT = 0x0, + MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY = 0x1, + MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND = 0x2, + MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR = 0x3, +}; + +enum { + MLX5_RQTC_LIST_Q_TYPE_RQ = 0x0, + MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q = 0x1, +}; + +struct mlx5_ifc_modify_virtio_net_q_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits general_obj_in_cmd_hdr; + + struct mlx5_ifc_virtio_net_q_object_bits obj_context; +}; + +struct mlx5_ifc_modify_virtio_net_q_out_bits { + struct mlx5_ifc_general_obj_out_cmd_hdr_bits general_obj_out_cmd_hdr; +}; + +#endif /* __MLX5_VDPA_IFC_H_ */ From 29064bfdabd5ef49eac6909d3a36a075e3b52255 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Tue, 4 Aug 2020 19:20:46 +0300 Subject: [PATCH 62/71] vdpa/mlx5: Add support library for mlx5 VDPA implementation Following patches introduce VDPA network driver for Mellanox Connectx6 devices. This patch provides functionality that will be used by those patches. Reviewed-by: Parav Pandit Signed-off-by: Eli Cohen Link: https://lore.kernel.org/r/20200804162048.22587-11-eli@mellanox.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/Kconfig | 9 + drivers/vdpa/Makefile | 1 + drivers/vdpa/mlx5/Makefile | 1 + drivers/vdpa/mlx5/core/Makefile | 1 + drivers/vdpa/mlx5/core/mlx5_vdpa.h | 57 ++++++ drivers/vdpa/mlx5/core/resources.c | 281 +++++++++++++++++++++++++++++ 6 files changed, 350 insertions(+) create mode 100644 drivers/vdpa/mlx5/Makefile create mode 100644 drivers/vdpa/mlx5/core/Makefile create mode 100644 drivers/vdpa/mlx5/core/mlx5_vdpa.h create mode 100644 drivers/vdpa/mlx5/core/resources.c diff --git a/drivers/vdpa/Kconfig b/drivers/vdpa/Kconfig index 3e1ceb8e9f2b..7cb84f82feba 100644 --- a/drivers/vdpa/Kconfig +++ b/drivers/vdpa/Kconfig @@ -28,4 +28,13 @@ config IFCVF To compile this driver as a module, choose M here: the module will be called ifcvf. +config MLX5_VDPA + bool "MLX5 VDPA support library for ConnectX devices" + depends on MLX5_CORE + default n + help + Support library for Mellanox VDPA drivers. Provides code that is + common for all types of VDPA drivers. The following drivers are planned: + net, block. + endif # VDPA diff --git a/drivers/vdpa/Makefile b/drivers/vdpa/Makefile index 8bbb686ca7a2..d160e9b63a66 100644 --- a/drivers/vdpa/Makefile +++ b/drivers/vdpa/Makefile @@ -2,3 +2,4 @@ obj-$(CONFIG_VDPA) += vdpa.o obj-$(CONFIG_VDPA_SIM) += vdpa_sim/ obj-$(CONFIG_IFCVF) += ifcvf/ +obj-$(CONFIG_MLX5_VDPA) += mlx5/ diff --git a/drivers/vdpa/mlx5/Makefile b/drivers/vdpa/mlx5/Makefile new file mode 100644 index 000000000000..d552abb1d126 --- /dev/null +++ b/drivers/vdpa/mlx5/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_MLX5_VDPA) += core/resources.o diff --git a/drivers/vdpa/mlx5/core/Makefile b/drivers/vdpa/mlx5/core/Makefile new file mode 100644 index 000000000000..7070f8c8680d --- /dev/null +++ b/drivers/vdpa/mlx5/core/Makefile @@ -0,0 +1 @@ +obj-y += resources.o diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h new file mode 100644 index 000000000000..f3571c8b257e --- /dev/null +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies Ltd. */ + +#ifndef __MLX5_VDPA_H__ +#define __MLX5_VDPA_H__ + +#include +#include + +struct mlx5_vdpa_resources { + u32 pdn; + struct mlx5_uars_page *uar; + void __iomem *kick_addr; + u16 uid; + u32 null_mkey; + bool valid; +}; + +struct mlx5_vdpa_dev { + struct vdpa_device vdev; + struct mlx5_core_dev *mdev; + struct mlx5_vdpa_resources res; + + u64 mlx_features; + u64 actual_features; + u8 status; + u32 max_vqs; + u32 generation; +}; + +int mlx5_vdpa_alloc_pd(struct mlx5_vdpa_dev *dev, u32 *pdn, u16 uid); +int mlx5_vdpa_dealloc_pd(struct mlx5_vdpa_dev *dev, u32 pdn, u16 uid); +int mlx5_vdpa_get_null_mkey(struct mlx5_vdpa_dev *dev, u32 *null_mkey); +int mlx5_vdpa_create_tis(struct mlx5_vdpa_dev *mvdev, void *in, u32 *tisn); +void mlx5_vdpa_destroy_tis(struct mlx5_vdpa_dev *mvdev, u32 tisn); +int mlx5_vdpa_create_rqt(struct mlx5_vdpa_dev *mvdev, void *in, int inlen, u32 *rqtn); +void mlx5_vdpa_destroy_rqt(struct mlx5_vdpa_dev *mvdev, u32 rqtn); +int mlx5_vdpa_create_tir(struct mlx5_vdpa_dev *mvdev, void *in, u32 *tirn); +void mlx5_vdpa_destroy_tir(struct mlx5_vdpa_dev *mvdev, u32 tirn); +int mlx5_vdpa_alloc_transport_domain(struct mlx5_vdpa_dev *mvdev, u32 *tdn); +void mlx5_vdpa_dealloc_transport_domain(struct mlx5_vdpa_dev *mvdev, u32 tdn); +int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev); +void mlx5_vdpa_free_resources(struct mlx5_vdpa_dev *mvdev); + +#define mlx5_vdpa_warn(__dev, format, ...) \ + dev_warn((__dev)->mdev->device, "%s:%d:(pid %d) warning: " format, __func__, __LINE__, \ + current->pid, ##__VA_ARGS__) + +#define mlx5_vdpa_info(__dev, format, ...) \ + dev_info((__dev)->mdev->device, "%s:%d:(pid %d): " format, __func__, __LINE__, \ + current->pid, ##__VA_ARGS__) + +#define mlx5_vdpa_dbg(__dev, format, ...) \ + dev_debug((__dev)->mdev->device, "%s:%d:(pid %d): " format, __func__, __LINE__, \ + current->pid, ##__VA_ARGS__) + +#endif /* __MLX5_VDPA_H__ */ diff --git a/drivers/vdpa/mlx5/core/resources.c b/drivers/vdpa/mlx5/core/resources.c new file mode 100644 index 000000000000..6c6552b7e9b5 --- /dev/null +++ b/drivers/vdpa/mlx5/core/resources.c @@ -0,0 +1,281 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Ltd. */ + +#include +#include "mlx5_vdpa.h" + +static int alloc_pd(struct mlx5_vdpa_dev *dev, u32 *pdn, u16 uid) +{ + struct mlx5_core_dev *mdev = dev->mdev; + + u32 out[MLX5_ST_SZ_DW(alloc_pd_out)] = {}; + u32 in[MLX5_ST_SZ_DW(alloc_pd_in)] = {}; + int err; + + MLX5_SET(alloc_pd_in, in, opcode, MLX5_CMD_OP_ALLOC_PD); + MLX5_SET(alloc_pd_in, in, uid, uid); + + err = mlx5_cmd_exec_inout(mdev, alloc_pd, in, out); + if (!err) + *pdn = MLX5_GET(alloc_pd_out, out, pd); + + return err; +} + +static int dealloc_pd(struct mlx5_vdpa_dev *dev, u32 pdn, u16 uid) +{ + u32 in[MLX5_ST_SZ_DW(dealloc_pd_in)] = {}; + struct mlx5_core_dev *mdev = dev->mdev; + + MLX5_SET(dealloc_pd_in, in, opcode, MLX5_CMD_OP_DEALLOC_PD); + MLX5_SET(dealloc_pd_in, in, pd, pdn); + MLX5_SET(dealloc_pd_in, in, uid, uid); + return mlx5_cmd_exec_in(mdev, dealloc_pd, in); +} + +static int get_null_mkey(struct mlx5_vdpa_dev *dev, u32 *null_mkey) +{ + u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)] = {}; + struct mlx5_core_dev *mdev = dev->mdev; + int err; + + MLX5_SET(query_special_contexts_in, in, opcode, MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS); + err = mlx5_cmd_exec_inout(mdev, query_special_contexts, in, out); + if (!err) + *null_mkey = MLX5_GET(query_special_contexts_out, out, null_mkey); + return err; +} + +static int create_uctx(struct mlx5_vdpa_dev *mvdev, u16 *uid) +{ + u32 out[MLX5_ST_SZ_DW(create_uctx_out)] = {}; + int inlen; + void *in; + int err; + + /* 0 means not supported */ + if (!MLX5_CAP_GEN(mvdev->mdev, log_max_uctx)) + return -EOPNOTSUPP; + + inlen = MLX5_ST_SZ_BYTES(create_uctx_in); + in = kzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(create_uctx_in, in, opcode, MLX5_CMD_OP_CREATE_UCTX); + MLX5_SET(create_uctx_in, in, uctx.cap, MLX5_UCTX_CAP_RAW_TX); + + err = mlx5_cmd_exec(mvdev->mdev, in, inlen, out, sizeof(out)); + kfree(in); + if (!err) + *uid = MLX5_GET(create_uctx_out, out, uid); + + return err; +} + +static void destroy_uctx(struct mlx5_vdpa_dev *mvdev, u32 uid) +{ + u32 out[MLX5_ST_SZ_DW(destroy_uctx_out)] = {}; + u32 in[MLX5_ST_SZ_DW(destroy_uctx_in)] = {}; + + MLX5_SET(destroy_uctx_in, in, opcode, MLX5_CMD_OP_DESTROY_UCTX); + MLX5_SET(destroy_uctx_in, in, uid, uid); + + mlx5_cmd_exec(mvdev->mdev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_vdpa_create_tis(struct mlx5_vdpa_dev *mvdev, void *in, u32 *tisn) +{ + u32 out[MLX5_ST_SZ_DW(create_tis_out)] = {}; + int err; + + MLX5_SET(create_tis_in, in, opcode, MLX5_CMD_OP_CREATE_TIS); + MLX5_SET(create_tis_in, in, uid, mvdev->res.uid); + err = mlx5_cmd_exec_inout(mvdev->mdev, create_tis, in, out); + if (!err) + *tisn = MLX5_GET(create_tis_out, out, tisn); + + return err; +} + +void mlx5_vdpa_destroy_tis(struct mlx5_vdpa_dev *mvdev, u32 tisn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_tis_in)] = {}; + + MLX5_SET(destroy_tis_in, in, opcode, MLX5_CMD_OP_DESTROY_TIS); + MLX5_SET(destroy_tis_in, in, uid, mvdev->res.uid); + MLX5_SET(destroy_tis_in, in, tisn, tisn); + mlx5_cmd_exec_in(mvdev->mdev, destroy_tis, in); +} + +int mlx5_vdpa_create_rqt(struct mlx5_vdpa_dev *mvdev, void *in, int inlen, u32 *rqtn) +{ + u32 out[MLX5_ST_SZ_DW(create_rqt_out)] = {}; + int err; + + MLX5_SET(create_rqt_in, in, opcode, MLX5_CMD_OP_CREATE_RQT); + err = mlx5_cmd_exec(mvdev->mdev, in, inlen, out, sizeof(out)); + if (!err) + *rqtn = MLX5_GET(create_rqt_out, out, rqtn); + + return err; +} + +void mlx5_vdpa_destroy_rqt(struct mlx5_vdpa_dev *mvdev, u32 rqtn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {}; + + MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT); + MLX5_SET(destroy_rqt_in, in, uid, mvdev->res.uid); + MLX5_SET(destroy_rqt_in, in, rqtn, rqtn); + mlx5_cmd_exec_in(mvdev->mdev, destroy_rqt, in); +} + +int mlx5_vdpa_create_tir(struct mlx5_vdpa_dev *mvdev, void *in, u32 *tirn) +{ + u32 out[MLX5_ST_SZ_DW(create_tir_out)] = {}; + int err; + + MLX5_SET(create_tir_in, in, opcode, MLX5_CMD_OP_CREATE_TIR); + err = mlx5_cmd_exec_inout(mvdev->mdev, create_tir, in, out); + if (!err) + *tirn = MLX5_GET(create_tir_out, out, tirn); + + return err; +} + +void mlx5_vdpa_destroy_tir(struct mlx5_vdpa_dev *mvdev, u32 tirn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_tir_in)] = {}; + + MLX5_SET(destroy_tir_in, in, opcode, MLX5_CMD_OP_DESTROY_TIR); + MLX5_SET(destroy_tir_in, in, uid, mvdev->res.uid); + MLX5_SET(destroy_tir_in, in, tirn, tirn); + mlx5_cmd_exec_in(mvdev->mdev, destroy_tir, in); +} + +int mlx5_vdpa_alloc_transport_domain(struct mlx5_vdpa_dev *mvdev, u32 *tdn) +{ + u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)] = {}; + u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)] = {}; + int err; + + MLX5_SET(alloc_transport_domain_in, in, opcode, MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN); + MLX5_SET(alloc_transport_domain_in, in, uid, mvdev->res.uid); + + err = mlx5_cmd_exec_inout(mvdev->mdev, alloc_transport_domain, in, out); + if (!err) + *tdn = MLX5_GET(alloc_transport_domain_out, out, transport_domain); + + return err; +} + +void mlx5_vdpa_dealloc_transport_domain(struct mlx5_vdpa_dev *mvdev, u32 tdn) +{ + u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)] = {}; + + MLX5_SET(dealloc_transport_domain_in, in, opcode, MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN); + MLX5_SET(dealloc_transport_domain_in, in, uid, mvdev->res.uid); + MLX5_SET(dealloc_transport_domain_in, in, transport_domain, tdn); + mlx5_cmd_exec_in(mvdev->mdev, dealloc_transport_domain, in); +} + +int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *mkey, u32 *in, + int inlen) +{ + u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {}; + u32 mkey_index; + void *mkc; + int err; + + MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY); + MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid); + + err = mlx5_cmd_exec(mvdev->mdev, in, inlen, lout, sizeof(lout)); + if (err) + return err; + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index); + mkey->iova = MLX5_GET64(mkc, mkc, start_addr); + mkey->size = MLX5_GET64(mkc, mkc, len); + mkey->key |= mlx5_idx_to_mkey(mkey_index); + mkey->pd = MLX5_GET(mkc, mkc, pd); + return 0; +} + +int mlx5_vdpa_destroy_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *mkey) +{ + u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)] = {}; + + MLX5_SET(destroy_mkey_in, in, uid, mvdev->res.uid); + MLX5_SET(destroy_mkey_in, in, opcode, MLX5_CMD_OP_DESTROY_MKEY); + MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey->key)); + return mlx5_cmd_exec_in(mvdev->mdev, destroy_mkey, in); +} + +int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev) +{ + u64 offset = MLX5_CAP64_DEV_VDPA_EMULATION(mvdev->mdev, doorbell_bar_offset); + struct mlx5_vdpa_resources *res = &mvdev->res; + struct mlx5_core_dev *mdev = mvdev->mdev; + u64 kick_addr; + int err; + + if (res->valid) { + mlx5_vdpa_warn(mvdev, "resources already allocated\n"); + return -EINVAL; + } + res->uar = mlx5_get_uars_page(mdev); + if (IS_ERR(res->uar)) { + err = PTR_ERR(res->uar); + goto err_uars; + } + + err = create_uctx(mvdev, &res->uid); + if (err) + goto err_uctx; + + err = alloc_pd(mvdev, &res->pdn, res->uid); + if (err) + goto err_pd; + + err = get_null_mkey(mvdev, &res->null_mkey); + if (err) + goto err_key; + + kick_addr = pci_resource_start(mdev->pdev, 0) + offset; + res->kick_addr = ioremap(kick_addr, PAGE_SIZE); + if (!res->kick_addr) { + err = -ENOMEM; + goto err_key; + } + res->valid = true; + + return 0; + +err_key: + dealloc_pd(mvdev, res->pdn, res->uid); +err_pd: + destroy_uctx(mvdev, res->uid); +err_uctx: + mlx5_put_uars_page(mdev, res->uar); +err_uars: + return err; +} + +void mlx5_vdpa_free_resources(struct mlx5_vdpa_dev *mvdev) +{ + struct mlx5_vdpa_resources *res = &mvdev->res; + + if (!res->valid) + return; + + iounmap(res->kick_addr); + res->kick_addr = NULL; + dealloc_pd(mvdev, res->pdn, res->uid); + destroy_uctx(mvdev, res->uid); + mlx5_put_uars_page(mvdev->mdev, res->uar); + res->valid = false; +} From 94abbccdf2916cb03f9626f2d36c6e9971490c12 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Tue, 4 Aug 2020 19:20:47 +0300 Subject: [PATCH 63/71] vdpa/mlx5: Add shared memory registration code Add code to support registering address space region for the device. The virtio driver can run as either: 1. Guest virtio driver 2. Userspace virtio driver on the host 3. Kernel virtio driver on the host In any case a memory key object is required to provide access to memory for the device. This code will be shared by network or block driver implementations. Reviewed-by: Parav Pandit Signed-off-by: Eli Cohen Link: https://lore.kernel.org/r/20200804162048.22587-12-eli@mellanox.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/mlx5/Makefile | 2 +- drivers/vdpa/mlx5/core/Makefile | 1 - drivers/vdpa/mlx5/core/mlx5_vdpa.h | 34 ++ drivers/vdpa/mlx5/core/mr.c | 484 +++++++++++++++++++++++++++++ drivers/vdpa/mlx5/core/resources.c | 3 + 5 files changed, 522 insertions(+), 2 deletions(-) delete mode 100644 drivers/vdpa/mlx5/core/Makefile create mode 100644 drivers/vdpa/mlx5/core/mr.c diff --git a/drivers/vdpa/mlx5/Makefile b/drivers/vdpa/mlx5/Makefile index d552abb1d126..b347c62032ea 100644 --- a/drivers/vdpa/mlx5/Makefile +++ b/drivers/vdpa/mlx5/Makefile @@ -1 +1 @@ -obj-$(CONFIG_MLX5_VDPA) += core/resources.o +obj-$(CONFIG_MLX5_VDPA) += core/resources.o core/mr.o diff --git a/drivers/vdpa/mlx5/core/Makefile b/drivers/vdpa/mlx5/core/Makefile deleted file mode 100644 index 7070f8c8680d..000000000000 --- a/drivers/vdpa/mlx5/core/Makefile +++ /dev/null @@ -1 +0,0 @@ -obj-y += resources.o diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h index f3571c8b257e..5c92a576edae 100644 --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h @@ -7,6 +7,31 @@ #include #include +struct mlx5_vdpa_direct_mr { + u64 start; + u64 end; + u32 perm; + struct mlx5_core_mkey mr; + struct sg_table sg_head; + int log_size; + int nsg; + struct list_head list; + u64 offset; +}; + +struct mlx5_vdpa_mr { + struct mlx5_core_mkey mkey; + + /* list of direct MRs descendants of this indirect mr */ + struct list_head head; + unsigned long num_directs; + unsigned long num_klms; + bool initialized; + + /* serialize mkey creation and destruction */ + struct mutex mkey_mtx; +}; + struct mlx5_vdpa_resources { u32 pdn; struct mlx5_uars_page *uar; @@ -26,6 +51,8 @@ struct mlx5_vdpa_dev { u8 status; u32 max_vqs; u32 generation; + + struct mlx5_vdpa_mr mr; }; int mlx5_vdpa_alloc_pd(struct mlx5_vdpa_dev *dev, u32 *pdn, u16 uid); @@ -41,6 +68,13 @@ int mlx5_vdpa_alloc_transport_domain(struct mlx5_vdpa_dev *mvdev, u32 *tdn); void mlx5_vdpa_dealloc_transport_domain(struct mlx5_vdpa_dev *mvdev, u32 tdn); int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev); void mlx5_vdpa_free_resources(struct mlx5_vdpa_dev *mvdev); +int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *mkey, u32 *in, + int inlen); +int mlx5_vdpa_destroy_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *mkey); +int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, + bool *change_map); +int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb); +void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev); #define mlx5_vdpa_warn(__dev, format, ...) \ dev_warn((__dev)->mdev->device, "%s:%d:(pid %d) warning: " format, __func__, __LINE__, \ diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c new file mode 100644 index 000000000000..084698975c47 --- /dev/null +++ b/drivers/vdpa/mlx5/core/mr.c @@ -0,0 +1,484 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Ltd. */ + +#include +#include +#include +#include +#include "mlx5_vdpa.h" + +/* DIV_ROUND_UP where the divider is a power of 2 give by its log base 2 value */ +#define MLX5_DIV_ROUND_UP_POW2(_n, _s) \ +({ \ + u64 __s = _s; \ + u64 _res; \ + _res = (((_n) + (1 << (__s)) - 1) >> (__s)); \ + _res; \ +}) + +static int get_octo_len(u64 len, int page_shift) +{ + u64 page_size = 1ULL << page_shift; + int npages; + + npages = ALIGN(len, page_size) >> page_shift; + return (npages + 1) / 2; +} + +static void fill_sg(struct mlx5_vdpa_direct_mr *mr, void *in) +{ + struct scatterlist *sg; + __be64 *pas; + int i; + + pas = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); + for_each_sg(mr->sg_head.sgl, sg, mr->nsg, i) + (*pas) = cpu_to_be64(sg_dma_address(sg)); +} + +static void mlx5_set_access_mode(void *mkc, int mode) +{ + MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3); + MLX5_SET(mkc, mkc, access_mode_4_2, mode >> 2); +} + +static void populate_mtts(struct mlx5_vdpa_direct_mr *mr, __be64 *mtt) +{ + struct scatterlist *sg; + int i; + + for_each_sg(mr->sg_head.sgl, sg, mr->nsg, i) + mtt[i] = cpu_to_be64(sg_dma_address(sg)); +} + +static int create_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr) +{ + int inlen; + void *mkc; + void *in; + int err; + + inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + roundup(MLX5_ST_SZ_BYTES(mtt) * mr->nsg, 16); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid); + fill_sg(mr, in); + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + MLX5_SET(mkc, mkc, lw, !!(mr->perm & VHOST_MAP_WO)); + MLX5_SET(mkc, mkc, lr, !!(mr->perm & VHOST_MAP_RO)); + mlx5_set_access_mode(mkc, MLX5_MKC_ACCESS_MODE_MTT); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET(mkc, mkc, pd, mvdev->res.pdn); + MLX5_SET64(mkc, mkc, start_addr, mr->offset); + MLX5_SET64(mkc, mkc, len, mr->end - mr->start); + MLX5_SET(mkc, mkc, log_page_size, mr->log_size); + MLX5_SET(mkc, mkc, translations_octword_size, + get_octo_len(mr->end - mr->start, mr->log_size)); + MLX5_SET(create_mkey_in, in, translations_octword_actual_size, + get_octo_len(mr->end - mr->start, mr->log_size)); + populate_mtts(mr, MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt)); + err = mlx5_vdpa_create_mkey(mvdev, &mr->mr, in, inlen); + kvfree(in); + if (err) { + mlx5_vdpa_warn(mvdev, "Failed to create direct MR\n"); + return err; + } + + return 0; +} + +static void destroy_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr) +{ + mlx5_vdpa_destroy_mkey(mvdev, &mr->mr); +} + +static u64 map_start(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr) +{ + return max_t(u64, map->start, mr->start); +} + +static u64 map_end(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr) +{ + return min_t(u64, map->last + 1, mr->end); +} + +static u64 maplen(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr) +{ + return map_end(map, mr) - map_start(map, mr); +} + +#define MLX5_VDPA_INVALID_START_ADDR ((u64)-1) +#define MLX5_VDPA_INVALID_LEN ((u64)-1) + +static u64 indir_start_addr(struct mlx5_vdpa_mr *mkey) +{ + struct mlx5_vdpa_direct_mr *s; + + s = list_first_entry_or_null(&mkey->head, struct mlx5_vdpa_direct_mr, list); + if (!s) + return MLX5_VDPA_INVALID_START_ADDR; + + return s->start; +} + +static u64 indir_len(struct mlx5_vdpa_mr *mkey) +{ + struct mlx5_vdpa_direct_mr *s; + struct mlx5_vdpa_direct_mr *e; + + s = list_first_entry_or_null(&mkey->head, struct mlx5_vdpa_direct_mr, list); + if (!s) + return MLX5_VDPA_INVALID_LEN; + + e = list_last_entry(&mkey->head, struct mlx5_vdpa_direct_mr, list); + + return e->end - s->start; +} + +#define LOG_MAX_KLM_SIZE 30 +#define MAX_KLM_SIZE BIT(LOG_MAX_KLM_SIZE) + +static u32 klm_bcount(u64 size) +{ + return (u32)size; +} + +static void fill_indir(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mkey, void *in) +{ + struct mlx5_vdpa_direct_mr *dmr; + struct mlx5_klm *klmarr; + struct mlx5_klm *klm; + bool first = true; + u64 preve; + int i; + + klmarr = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); + i = 0; + list_for_each_entry(dmr, &mkey->head, list) { +again: + klm = &klmarr[i++]; + if (first) { + preve = dmr->start; + first = false; + } + + if (preve == dmr->start) { + klm->key = cpu_to_be32(dmr->mr.key); + klm->bcount = cpu_to_be32(klm_bcount(dmr->end - dmr->start)); + preve = dmr->end; + } else { + klm->key = cpu_to_be32(mvdev->res.null_mkey); + klm->bcount = cpu_to_be32(klm_bcount(dmr->start - preve)); + preve = dmr->start; + goto again; + } + } +} + +static int klm_byte_size(int nklms) +{ + return 16 * ALIGN(nklms, 4); +} + +static int create_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) +{ + int inlen; + void *mkc; + void *in; + int err; + u64 start; + u64 len; + + start = indir_start_addr(mr); + len = indir_len(mr); + if (start == MLX5_VDPA_INVALID_START_ADDR || len == MLX5_VDPA_INVALID_LEN) + return -EINVAL; + + inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + klm_byte_size(mr->num_klms); + in = kzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid); + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + MLX5_SET(mkc, mkc, lw, 1); + MLX5_SET(mkc, mkc, lr, 1); + mlx5_set_access_mode(mkc, MLX5_MKC_ACCESS_MODE_KLMS); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET(mkc, mkc, pd, mvdev->res.pdn); + MLX5_SET64(mkc, mkc, start_addr, start); + MLX5_SET64(mkc, mkc, len, len); + MLX5_SET(mkc, mkc, translations_octword_size, klm_byte_size(mr->num_klms) / 16); + MLX5_SET(create_mkey_in, in, translations_octword_actual_size, mr->num_klms); + fill_indir(mvdev, mr, in); + err = mlx5_vdpa_create_mkey(mvdev, &mr->mkey, in, inlen); + kfree(in); + return err; +} + +static void destroy_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mkey) +{ + mlx5_vdpa_destroy_mkey(mvdev, &mkey->mkey); +} + +static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr, + struct vhost_iotlb *iotlb) +{ + struct vhost_iotlb_map *map; + unsigned long lgcd = 0; + int log_entity_size; + unsigned long size; + u64 start = 0; + int err; + struct page *pg; + unsigned int nsg; + int sglen; + u64 pa; + u64 paend; + struct scatterlist *sg; + struct device *dma = mvdev->mdev->device; + int ret; + + for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1); + map; map = vhost_iotlb_itree_next(map, start, mr->end - 1)) { + size = maplen(map, mr); + lgcd = gcd(lgcd, size); + start += size; + } + log_entity_size = ilog2(lgcd); + + sglen = 1 << log_entity_size; + nsg = MLX5_DIV_ROUND_UP_POW2(mr->end - mr->start, log_entity_size); + + err = sg_alloc_table(&mr->sg_head, nsg, GFP_KERNEL); + if (err) + return err; + + sg = mr->sg_head.sgl; + for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1); + map; map = vhost_iotlb_itree_next(map, mr->start, mr->end - 1)) { + paend = map->addr + maplen(map, mr); + for (pa = map->addr; pa < paend; pa += sglen) { + pg = pfn_to_page(__phys_to_pfn(pa)); + if (!sg) { + mlx5_vdpa_warn(mvdev, "sg null. start 0x%llx, end 0x%llx\n", + map->start, map->last + 1); + err = -ENOMEM; + goto err_map; + } + sg_set_page(sg, pg, sglen, 0); + sg = sg_next(sg); + if (!sg) + goto done; + } + } +done: + mr->log_size = log_entity_size; + mr->nsg = nsg; + ret = dma_map_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0); + if (!ret) + goto err_map; + + err = create_direct_mr(mvdev, mr); + if (err) + goto err_direct; + + return 0; + +err_direct: + dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0); +err_map: + sg_free_table(&mr->sg_head); + return err; +} + +static void unmap_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr) +{ + struct device *dma = mvdev->mdev->device; + + destroy_direct_mr(mvdev, mr); + dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0); + sg_free_table(&mr->sg_head); +} + +static int add_direct_chain(struct mlx5_vdpa_dev *mvdev, u64 start, u64 size, u8 perm, + struct vhost_iotlb *iotlb) +{ + struct mlx5_vdpa_mr *mr = &mvdev->mr; + struct mlx5_vdpa_direct_mr *dmr; + struct mlx5_vdpa_direct_mr *n; + LIST_HEAD(tmp); + u64 st; + u64 sz; + int err; + int i = 0; + + st = start; + while (size) { + sz = (u32)min_t(u64, MAX_KLM_SIZE, size); + dmr = kzalloc(sizeof(*dmr), GFP_KERNEL); + if (!dmr) + goto err_alloc; + + dmr->start = st; + dmr->end = st + sz; + dmr->perm = perm; + err = map_direct_mr(mvdev, dmr, iotlb); + if (err) { + kfree(dmr); + goto err_alloc; + } + + list_add_tail(&dmr->list, &tmp); + size -= sz; + mr->num_directs++; + mr->num_klms++; + st += sz; + i++; + } + list_splice_tail(&tmp, &mr->head); + return 0; + +err_alloc: + list_for_each_entry_safe(dmr, n, &mr->head, list) { + list_del_init(&dmr->list); + unmap_direct_mr(mvdev, dmr); + kfree(dmr); + } + return err; +} + +/* The iotlb pointer contains a list of maps. Go over the maps, possibly + * merging mergeable maps, and create direct memory keys that provide the + * device access to memory. The direct mkeys are then referred to by the + * indirect memory key that provides access to the enitre address space given + * by iotlb. + */ +static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb) +{ + struct mlx5_vdpa_mr *mr = &mvdev->mr; + struct mlx5_vdpa_direct_mr *dmr; + struct mlx5_vdpa_direct_mr *n; + struct vhost_iotlb_map *map; + u32 pperm = U16_MAX; + u64 last = U64_MAX; + u64 ps = U64_MAX; + u64 pe = U64_MAX; + u64 start = 0; + int err = 0; + int nnuls; + + if (mr->initialized) + return 0; + + INIT_LIST_HEAD(&mr->head); + for (map = vhost_iotlb_itree_first(iotlb, start, last); map; + map = vhost_iotlb_itree_next(map, start, last)) { + start = map->start; + if (pe == map->start && pperm == map->perm) { + pe = map->last + 1; + } else { + if (ps != U64_MAX) { + if (pe < map->start) { + /* We have a hole in the map. Check how + * many null keys are required to fill it. + */ + nnuls = MLX5_DIV_ROUND_UP_POW2(map->start - pe, + LOG_MAX_KLM_SIZE); + mr->num_klms += nnuls; + } + err = add_direct_chain(mvdev, ps, pe - ps, pperm, iotlb); + if (err) + goto err_chain; + } + ps = map->start; + pe = map->last + 1; + pperm = map->perm; + } + } + err = add_direct_chain(mvdev, ps, pe - ps, pperm, iotlb); + if (err) + goto err_chain; + + /* Create the memory key that defines the guests's address space. This + * memory key refers to the direct keys that contain the MTT + * translations + */ + err = create_indirect_key(mvdev, mr); + if (err) + goto err_chain; + + mr->initialized = true; + return 0; + +err_chain: + list_for_each_entry_safe_reverse(dmr, n, &mr->head, list) { + list_del_init(&dmr->list); + unmap_direct_mr(mvdev, dmr); + kfree(dmr); + } + return err; +} + +int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb) +{ + struct mlx5_vdpa_mr *mr = &mvdev->mr; + int err; + + mutex_lock(&mr->mkey_mtx); + err = _mlx5_vdpa_create_mr(mvdev, iotlb); + mutex_unlock(&mr->mkey_mtx); + return err; +} + +void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev) +{ + struct mlx5_vdpa_mr *mr = &mvdev->mr; + struct mlx5_vdpa_direct_mr *dmr; + struct mlx5_vdpa_direct_mr *n; + + mutex_lock(&mr->mkey_mtx); + if (!mr->initialized) + goto out; + + destroy_indirect_key(mvdev, mr); + list_for_each_entry_safe_reverse(dmr, n, &mr->head, list) { + list_del_init(&dmr->list); + unmap_direct_mr(mvdev, dmr); + kfree(dmr); + } + memset(mr, 0, sizeof(*mr)); + mr->initialized = false; +out: + mutex_unlock(&mr->mkey_mtx); +} + +static bool map_empty(struct vhost_iotlb *iotlb) +{ + return !vhost_iotlb_itree_first(iotlb, 0, U64_MAX); +} + +int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, + bool *change_map) +{ + struct mlx5_vdpa_mr *mr = &mvdev->mr; + int err; + + *change_map = false; + if (map_empty(iotlb)) { + mlx5_vdpa_destroy_mr(mvdev); + return 0; + } + mutex_lock(&mr->mkey_mtx); + if (mr->initialized) { + mlx5_vdpa_info(mvdev, "memory map update\n"); + *change_map = true; + } + if (!*change_map) + err = _mlx5_vdpa_create_mr(mvdev, iotlb); + mutex_unlock(&mr->mkey_mtx); + + return err; +} diff --git a/drivers/vdpa/mlx5/core/resources.c b/drivers/vdpa/mlx5/core/resources.c index 6c6552b7e9b5..96e6421c5d1c 100644 --- a/drivers/vdpa/mlx5/core/resources.c +++ b/drivers/vdpa/mlx5/core/resources.c @@ -227,6 +227,7 @@ int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev) mlx5_vdpa_warn(mvdev, "resources already allocated\n"); return -EINVAL; } + mutex_init(&mvdev->mr.mkey_mtx); res->uar = mlx5_get_uars_page(mdev); if (IS_ERR(res->uar)) { err = PTR_ERR(res->uar); @@ -262,6 +263,7 @@ int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev) err_uctx: mlx5_put_uars_page(mdev, res->uar); err_uars: + mutex_destroy(&mvdev->mr.mkey_mtx); return err; } @@ -277,5 +279,6 @@ void mlx5_vdpa_free_resources(struct mlx5_vdpa_dev *mvdev) dealloc_pd(mvdev, res->pdn, res->uid); destroy_uctx(mvdev, res->uid); mlx5_put_uars_page(mvdev->mdev, res->uar); + mutex_destroy(&mvdev->mr.mkey_mtx); res->valid = false; } From 1a86b377aa2147a7c866b03142e848c18e5f3cb8 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Tue, 4 Aug 2020 19:20:48 +0300 Subject: [PATCH 64/71] vdpa/mlx5: Add VDPA driver for supported mlx5 devices Add a front end VDPA driver that registers in the VDPA bus and provides networking to a guest. The VDPA driver creates the necessary resources on the VF it is driving such that data path will be offloaded. Notifications are being communicated through the driver. Currently, only VFs are supported. In subsequent patches we will have devlink support to control which VF is used for VDPA and which function is used for regular networking. Reviewed-by: Parav Pandit Signed-off-by: Eli Cohen Link: https://lore.kernel.org/r/20200804162048.22587-13-eli@mellanox.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/Kconfig | 10 + drivers/vdpa/mlx5/Makefile | 5 +- drivers/vdpa/mlx5/core/mr.c | 2 +- drivers/vdpa/mlx5/net/main.c | 76 ++ drivers/vdpa/mlx5/net/mlx5_vnet.c | 1965 +++++++++++++++++++++++++++++ drivers/vdpa/mlx5/net/mlx5_vnet.h | 24 + 6 files changed, 2080 insertions(+), 2 deletions(-) create mode 100644 drivers/vdpa/mlx5/net/main.c create mode 100644 drivers/vdpa/mlx5/net/mlx5_vnet.c create mode 100644 drivers/vdpa/mlx5/net/mlx5_vnet.h diff --git a/drivers/vdpa/Kconfig b/drivers/vdpa/Kconfig index 7cb84f82feba..a8c7607fdc90 100644 --- a/drivers/vdpa/Kconfig +++ b/drivers/vdpa/Kconfig @@ -37,4 +37,14 @@ config MLX5_VDPA common for all types of VDPA drivers. The following drivers are planned: net, block. +config MLX5_VDPA_NET + tristate "vDPA driver for ConnectX devices" + depends on MLX5_VDPA + default n + help + VDPA network driver for ConnectX6 and newer. Provides offloading + of virtio net datapath such that descriptors put on the ring will + be executed by the hardware. It also supports a variety of stateless + offloads depending on the actual device used and firmware version. + endif # VDPA diff --git a/drivers/vdpa/mlx5/Makefile b/drivers/vdpa/mlx5/Makefile index b347c62032ea..89a5bededc9f 100644 --- a/drivers/vdpa/mlx5/Makefile +++ b/drivers/vdpa/mlx5/Makefile @@ -1 +1,4 @@ -obj-$(CONFIG_MLX5_VDPA) += core/resources.o core/mr.o +subdir-ccflags-y += -I$(srctree)/drivers/vdpa/mlx5/core + +obj-$(CONFIG_MLX5_VDPA_NET) += mlx5_vdpa.o +mlx5_vdpa-$(CONFIG_MLX5_VDPA_NET) += net/main.o net/mlx5_vnet.o core/resources.o core/mr.o diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c index 084698975c47..f5dec0274133 100644 --- a/drivers/vdpa/mlx5/core/mr.c +++ b/drivers/vdpa/mlx5/core/mr.c @@ -464,7 +464,7 @@ int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *io bool *change_map) { struct mlx5_vdpa_mr *mr = &mvdev->mr; - int err; + int err = 0; *change_map = false; if (map_empty(iotlb)) { diff --git a/drivers/vdpa/mlx5/net/main.c b/drivers/vdpa/mlx5/net/main.c new file mode 100644 index 000000000000..838cd98386ff --- /dev/null +++ b/drivers/vdpa/mlx5/net/main.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Ltd. */ + +#include +#include +#include +#include "mlx5_vdpa_ifc.h" +#include "mlx5_vnet.h" + +MODULE_AUTHOR("Eli Cohen "); +MODULE_DESCRIPTION("Mellanox VDPA driver"); +MODULE_LICENSE("Dual BSD/GPL"); + +static bool required_caps_supported(struct mlx5_core_dev *mdev) +{ + u8 event_mode; + u64 got; + + got = MLX5_CAP_GEN_64(mdev, general_obj_types); + + if (!(got & MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q)) + return false; + + event_mode = MLX5_CAP_DEV_VDPA_EMULATION(mdev, event_mode); + if (!(event_mode & MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE)) + return false; + + if (!MLX5_CAP_DEV_VDPA_EMULATION(mdev, eth_frame_offload_type)) + return false; + + return true; +} + +static void *mlx5_vdpa_add(struct mlx5_core_dev *mdev) +{ + struct mlx5_vdpa_dev *vdev; + + if (mlx5_core_is_pf(mdev)) + return NULL; + + if (!required_caps_supported(mdev)) { + dev_info(mdev->device, "virtio net emulation not supported\n"); + return NULL; + } + vdev = mlx5_vdpa_add_dev(mdev); + if (IS_ERR(vdev)) + return NULL; + + return vdev; +} + +static void mlx5_vdpa_remove(struct mlx5_core_dev *mdev, void *context) +{ + struct mlx5_vdpa_dev *vdev = context; + + mlx5_vdpa_remove_dev(vdev); +} + +static struct mlx5_interface mlx5_vdpa_interface = { + .add = mlx5_vdpa_add, + .remove = mlx5_vdpa_remove, + .protocol = MLX5_INTERFACE_PROTOCOL_VDPA, +}; + +static int __init mlx5_vdpa_init(void) +{ + return mlx5_register_interface(&mlx5_vdpa_interface); +} + +static void __exit mlx5_vdpa_exit(void) +{ + mlx5_unregister_interface(&mlx5_vdpa_interface); +} + +module_init(mlx5_vdpa_init); +module_exit(mlx5_vdpa_exit); diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c new file mode 100644 index 000000000000..3ec44a4f0e45 --- /dev/null +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -0,0 +1,1965 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Ltd. */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "mlx5_vnet.h" +#include "mlx5_vdpa_ifc.h" +#include "mlx5_vdpa.h" + +#define to_mvdev(__vdev) container_of((__vdev), struct mlx5_vdpa_dev, vdev) + +#define VALID_FEATURES_MASK \ + (BIT(VIRTIO_NET_F_CSUM) | BIT(VIRTIO_NET_F_GUEST_CSUM) | \ + BIT(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT(VIRTIO_NET_F_MTU) | BIT(VIRTIO_NET_F_MAC) | \ + BIT(VIRTIO_NET_F_GUEST_TSO4) | BIT(VIRTIO_NET_F_GUEST_TSO6) | \ + BIT(VIRTIO_NET_F_GUEST_ECN) | BIT(VIRTIO_NET_F_GUEST_UFO) | BIT(VIRTIO_NET_F_HOST_TSO4) | \ + BIT(VIRTIO_NET_F_HOST_TSO6) | BIT(VIRTIO_NET_F_HOST_ECN) | BIT(VIRTIO_NET_F_HOST_UFO) | \ + BIT(VIRTIO_NET_F_MRG_RXBUF) | BIT(VIRTIO_NET_F_STATUS) | BIT(VIRTIO_NET_F_CTRL_VQ) | \ + BIT(VIRTIO_NET_F_CTRL_RX) | BIT(VIRTIO_NET_F_CTRL_VLAN) | \ + BIT(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT(VIRTIO_NET_F_GUEST_ANNOUNCE) | \ + BIT(VIRTIO_NET_F_MQ) | BIT(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT(VIRTIO_NET_F_HASH_REPORT) | \ + BIT(VIRTIO_NET_F_RSS) | BIT(VIRTIO_NET_F_RSC_EXT) | BIT(VIRTIO_NET_F_STANDBY) | \ + BIT(VIRTIO_NET_F_SPEED_DUPLEX) | BIT(VIRTIO_F_NOTIFY_ON_EMPTY) | \ + BIT(VIRTIO_F_ANY_LAYOUT) | BIT(VIRTIO_F_VERSION_1) | BIT(VIRTIO_F_ACCESS_PLATFORM) | \ + BIT(VIRTIO_F_RING_PACKED) | BIT(VIRTIO_F_ORDER_PLATFORM) | BIT(VIRTIO_F_SR_IOV)) + +#define VALID_STATUS_MASK \ + (VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK | \ + VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED) + +struct mlx5_vdpa_net_resources { + u32 tisn; + u32 tdn; + u32 tirn; + u32 rqtn; + bool valid; +}; + +struct mlx5_vdpa_cq_buf { + struct mlx5_frag_buf_ctrl fbc; + struct mlx5_frag_buf frag_buf; + int cqe_size; + int nent; +}; + +struct mlx5_vdpa_cq { + struct mlx5_core_cq mcq; + struct mlx5_vdpa_cq_buf buf; + struct mlx5_db db; + int cqe; +}; + +struct mlx5_vdpa_umem { + struct mlx5_frag_buf_ctrl fbc; + struct mlx5_frag_buf frag_buf; + int size; + u32 id; +}; + +struct mlx5_vdpa_qp { + struct mlx5_core_qp mqp; + struct mlx5_frag_buf frag_buf; + struct mlx5_db db; + u16 head; + bool fw; +}; + +struct mlx5_vq_restore_info { + u32 num_ent; + u64 desc_addr; + u64 device_addr; + u64 driver_addr; + u16 avail_index; + bool ready; + struct vdpa_callback cb; + bool restore; +}; + +struct mlx5_vdpa_virtqueue { + bool ready; + u64 desc_addr; + u64 device_addr; + u64 driver_addr; + u32 num_ent; + struct vdpa_callback event_cb; + + /* Resources for implementing the notification channel from the device + * to the driver. fwqp is the firmware end of an RC connection; the + * other end is vqqp used by the driver. cq is is where completions are + * reported. + */ + struct mlx5_vdpa_cq cq; + struct mlx5_vdpa_qp fwqp; + struct mlx5_vdpa_qp vqqp; + + /* umem resources are required for the virtqueue operation. They're use + * is internal and they must be provided by the driver. + */ + struct mlx5_vdpa_umem umem1; + struct mlx5_vdpa_umem umem2; + struct mlx5_vdpa_umem umem3; + + bool initialized; + int index; + u32 virtq_id; + struct mlx5_vdpa_net *ndev; + u16 avail_idx; + int fw_state; + + /* keep last in the struct */ + struct mlx5_vq_restore_info ri; +}; + +/* We will remove this limitation once mlx5_vdpa_alloc_resources() + * provides for driver space allocation + */ +#define MLX5_MAX_SUPPORTED_VQS 16 + +struct mlx5_vdpa_net { + struct mlx5_vdpa_dev mvdev; + struct mlx5_vdpa_net_resources res; + struct virtio_net_config config; + struct mlx5_vdpa_virtqueue vqs[MLX5_MAX_SUPPORTED_VQS]; + + /* Serialize vq resources creation and destruction. This is required + * since memory map might change and we need to destroy and create + * resources while driver in operational. + */ + struct mutex reslock; + struct mlx5_flow_table *rxft; + struct mlx5_fc *rx_counter; + struct mlx5_flow_handle *rx_rule; + bool setup; +}; + +static void free_resources(struct mlx5_vdpa_net *ndev); +static void init_mvqs(struct mlx5_vdpa_net *ndev); +static int setup_driver(struct mlx5_vdpa_net *ndev); +static void teardown_driver(struct mlx5_vdpa_net *ndev); + +static bool mlx5_vdpa_debug; + +#define MLX5_LOG_VIO_FLAG(_feature) \ + do { \ + if (features & BIT(_feature)) \ + mlx5_vdpa_info(mvdev, "%s\n", #_feature); \ + } while (0) + +#define MLX5_LOG_VIO_STAT(_status) \ + do { \ + if (status & (_status)) \ + mlx5_vdpa_info(mvdev, "%s\n", #_status); \ + } while (0) + +static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set) +{ + if (status & ~VALID_STATUS_MASK) + mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n", + status & ~VALID_STATUS_MASK); + + if (!mlx5_vdpa_debug) + return; + + mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get"); + if (set && !status) { + mlx5_vdpa_info(mvdev, "driver resets the device\n"); + return; + } + + MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE); + MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER); + MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK); + MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK); + MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET); + MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED); +} + +static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set) +{ + if (features & ~VALID_FEATURES_MASK) + mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n", + features & ~VALID_FEATURES_MASK); + + if (!mlx5_vdpa_debug) + return; + + mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads"); + if (!features) + mlx5_vdpa_info(mvdev, "all feature bits are cleared\n"); + + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX); + MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY); + MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT); + MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1); + MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM); + MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED); + MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM); + MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV); +} + +static int create_tis(struct mlx5_vdpa_net *ndev) +{ + struct mlx5_vdpa_dev *mvdev = &ndev->mvdev; + u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {}; + void *tisc; + int err; + + tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); + MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn); + err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn); + if (err) + mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err); + + return err; +} + +static void destroy_tis(struct mlx5_vdpa_net *ndev) +{ + mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn); +} + +#define MLX5_VDPA_CQE_SIZE 64 +#define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE) + +static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent) +{ + struct mlx5_frag_buf *frag_buf = &buf->frag_buf; + u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE; + u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE; + int err; + + err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf, + ndev->mvdev.mdev->priv.numa_node); + if (err) + return err; + + mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc); + + buf->cqe_size = MLX5_VDPA_CQE_SIZE; + buf->nent = nent; + + return 0; +} + +static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size) +{ + struct mlx5_frag_buf *frag_buf = &umem->frag_buf; + + return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf, + ndev->mvdev.mdev->priv.numa_node); +} + +static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf) +{ + mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf); +} + +static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n) +{ + return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n); +} + +static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf) +{ + struct mlx5_cqe64 *cqe64; + void *cqe; + int i; + + for (i = 0; i < buf->nent; i++) { + cqe = get_cqe(vcq, i); + cqe64 = cqe; + cqe64->op_own = MLX5_CQE_INVALID << 4; + } +} + +static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n) +{ + struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1)); + + if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) && + !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe))) + return cqe64; + + return NULL; +} + +static void rx_post(struct mlx5_vdpa_qp *vqp, int n) +{ + vqp->head += n; + vqp->db.db[0] = cpu_to_be32(vqp->head); +} + +static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in, + struct mlx5_vdpa_virtqueue *mvq, u32 num_ent) +{ + struct mlx5_vdpa_qp *vqp; + __be64 *pas; + void *qpc; + + vqp = fw ? &mvq->fwqp : &mvq->vqqp; + MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid); + qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); + if (vqp->fw) { + /* Firmware QP is allocated by the driver for the firmware's + * use so we can skip part of the params as they will be chosen by firmware + */ + qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); + MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ); + MLX5_SET(qpc, qpc, no_sq, 1); + return; + } + + MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); + MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); + MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn); + MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES); + MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index); + MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(qpc, qpc, no_sq, 1); + MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn); + MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent)); + MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ); + pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas); + mlx5_fill_page_frag_array(&vqp->frag_buf, pas); +} + +static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent) +{ + return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, + num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf, + ndev->mvdev.mdev->priv.numa_node); +} + +static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp) +{ + mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf); +} + +static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, + struct mlx5_vdpa_qp *vqp) +{ + struct mlx5_core_dev *mdev = ndev->mvdev.mdev; + int inlen = MLX5_ST_SZ_BYTES(create_qp_in); + u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {}; + void *qpc; + void *in; + int err; + + if (!vqp->fw) { + vqp = &mvq->vqqp; + err = rq_buf_alloc(ndev, vqp, mvq->num_ent); + if (err) + return err; + + err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db); + if (err) + goto err_db; + inlen += vqp->frag_buf.npages * sizeof(__be64); + } + + in = kzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_kzalloc; + } + + qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent); + qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); + MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); + MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); + MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn); + MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES); + if (!vqp->fw) + MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma); + MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP); + err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); + kfree(in); + if (err) + goto err_kzalloc; + + vqp->mqp.uid = ndev->mvdev.res.uid; + vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn); + + if (!vqp->fw) + rx_post(vqp, mvq->num_ent); + + return 0; + +err_kzalloc: + if (!vqp->fw) + mlx5_db_free(ndev->mvdev.mdev, &vqp->db); +err_db: + if (!vqp->fw) + rq_buf_free(ndev, vqp); + + return err; +} + +static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp) +{ + u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {}; + + MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); + MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn); + MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid); + if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in)) + mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn); + if (!vqp->fw) { + mlx5_db_free(ndev->mvdev.mdev, &vqp->db); + rq_buf_free(ndev, vqp); + } +} + +static void *next_cqe_sw(struct mlx5_vdpa_cq *cq) +{ + return get_sw_cqe(cq, cq->mcq.cons_index); +} + +static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq) +{ + struct mlx5_cqe64 *cqe64; + + cqe64 = next_cqe_sw(vcq); + if (!cqe64) + return -EAGAIN; + + vcq->mcq.cons_index++; + return 0; +} + +static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num) +{ + mlx5_cq_set_ci(&mvq->cq.mcq); + rx_post(&mvq->vqqp, num); + if (mvq->event_cb.callback) + mvq->event_cb.callback(mvq->event_cb.private); +} + +static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe) +{ + struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq); + struct mlx5_vdpa_net *ndev = mvq->ndev; + void __iomem *uar_page = ndev->mvdev.res.uar->map; + int num = 0; + + while (!mlx5_vdpa_poll_one(&mvq->cq)) { + num++; + if (num > mvq->num_ent / 2) { + /* If completions keep coming while we poll, we want to + * let the hardware know that we consumed them by + * updating the doorbell record. We also let vdpa core + * know about this so it passes it on the virtio driver + * on the guest. + */ + mlx5_vdpa_handle_completions(mvq, num); + num = 0; + } + } + + if (num) + mlx5_vdpa_handle_completions(mvq, num); + + mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index); +} + +static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent) +{ + struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; + struct mlx5_core_dev *mdev = ndev->mvdev.mdev; + void __iomem *uar_page = ndev->mvdev.res.uar->map; + u32 out[MLX5_ST_SZ_DW(create_cq_out)]; + struct mlx5_vdpa_cq *vcq = &mvq->cq; + unsigned int irqn; + __be64 *pas; + int inlen; + void *cqc; + void *in; + int err; + int eqn; + + err = mlx5_db_alloc(mdev, &vcq->db); + if (err) + return err; + + vcq->mcq.set_ci_db = vcq->db.db; + vcq->mcq.arm_db = vcq->db.db + 1; + vcq->mcq.cqe_sz = 64; + + err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent); + if (err) + goto err_db; + + cq_frag_buf_init(vcq, &vcq->buf); + + inlen = MLX5_ST_SZ_BYTES(create_cq_in) + + MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages; + in = kzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_vzalloc; + } + + MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid); + pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas); + mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas); + + cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); + MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); + + /* Use vector 0 by default. Consider adding code to choose least used + * vector. + */ + err = mlx5_vector2eqn(mdev, 0, &eqn, &irqn); + if (err) + goto err_vec; + + cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); + MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent)); + MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index); + MLX5_SET(cqc, cqc, c_eqn, eqn); + MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma); + + err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out)); + if (err) + goto err_vec; + + vcq->mcq.comp = mlx5_vdpa_cq_comp; + vcq->cqe = num_ent; + vcq->mcq.set_ci_db = vcq->db.db; + vcq->mcq.arm_db = vcq->db.db + 1; + mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index); + kfree(in); + return 0; + +err_vec: + kfree(in); +err_vzalloc: + cq_frag_buf_free(ndev, &vcq->buf); +err_db: + mlx5_db_free(ndev->mvdev.mdev, &vcq->db); + return err; +} + +static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx) +{ + struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; + struct mlx5_core_dev *mdev = ndev->mvdev.mdev; + struct mlx5_vdpa_cq *vcq = &mvq->cq; + + if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) { + mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn); + return; + } + cq_frag_buf_free(ndev, &vcq->buf); + mlx5_db_free(ndev->mvdev.mdev, &vcq->db); +} + +static int umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num, + struct mlx5_vdpa_umem **umemp) +{ + struct mlx5_core_dev *mdev = ndev->mvdev.mdev; + int p_a; + int p_b; + + switch (num) { + case 1: + p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_a); + p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_b); + *umemp = &mvq->umem1; + break; + case 2: + p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_a); + p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_b); + *umemp = &mvq->umem2; + break; + case 3: + p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_a); + p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_b); + *umemp = &mvq->umem3; + break; + } + return p_a * mvq->num_ent + p_b; +} + +static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem) +{ + mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf); +} + +static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num) +{ + int inlen; + u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {}; + void *um; + void *in; + int err; + __be64 *pas; + int size; + struct mlx5_vdpa_umem *umem; + + size = umem_size(ndev, mvq, num, &umem); + if (size < 0) + return size; + + umem->size = size; + err = umem_frag_buf_alloc(ndev, umem, size); + if (err) + return err; + + inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages; + + in = kzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_in; + } + + MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM); + MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid); + um = MLX5_ADDR_OF(create_umem_in, in, umem); + MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages); + + pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]); + mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW); + + err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); + if (err) { + mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err); + goto err_cmd; + } + + kfree(in); + umem->id = MLX5_GET(create_umem_out, out, umem_id); + + return 0; + +err_cmd: + kfree(in); +err_in: + umem_frag_buf_free(ndev, umem); + return err; +} + +static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num) +{ + u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {}; + u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {}; + struct mlx5_vdpa_umem *umem; + + switch (num) { + case 1: + umem = &mvq->umem1; + break; + case 2: + umem = &mvq->umem2; + break; + case 3: + umem = &mvq->umem3; + break; + } + + MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM); + MLX5_SET(destroy_umem_in, in, umem_id, umem->id); + if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) + return; + + umem_frag_buf_free(ndev, umem); +} + +static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +{ + int num; + int err; + + for (num = 1; num <= 3; num++) { + err = create_umem(ndev, mvq, num); + if (err) + goto err_umem; + } + return 0; + +err_umem: + for (num--; num > 0; num--) + umem_destroy(ndev, mvq, num); + + return err; +} + +static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +{ + int num; + + for (num = 3; num > 0; num--) + umem_destroy(ndev, mvq, num); +} + +static int get_queue_type(struct mlx5_vdpa_net *ndev) +{ + u32 type_mask; + + type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type); + + /* prefer split queue */ + if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED) + return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED; + + WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)); + + return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT; +} + +static bool vq_is_tx(u16 idx) +{ + return idx % 2; +} + +static u16 get_features_12_3(u64 features) +{ + return (!!(features & BIT(VIRTIO_NET_F_HOST_TSO4)) << 9) | + (!!(features & BIT(VIRTIO_NET_F_HOST_TSO6)) << 8) | + (!!(features & BIT(VIRTIO_NET_F_CSUM)) << 7) | + (!!(features & BIT(VIRTIO_NET_F_GUEST_CSUM)) << 6); +} + +static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +{ + int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in); + u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {}; + void *obj_context; + void *cmd_hdr; + void *vq_ctx; + void *in; + int err; + + err = umems_create(ndev, mvq); + if (err) + return err; + + in = kzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_alloc; + } + + cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr); + + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q); + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); + + obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context); + MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx); + MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3, + get_features_12_3(ndev->mvdev.actual_features)); + vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context); + MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev)); + + if (vq_is_tx(mvq->index)) + MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn); + + MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE); + MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index); + MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn); + MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent); + MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, + !!(ndev->mvdev.actual_features & VIRTIO_F_VERSION_1)); + MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr); + MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr); + MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr); + MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, ndev->mvdev.mr.mkey.key); + MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id); + MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size); + MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id); + MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem1.size); + MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id); + MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem1.size); + MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn); + if (MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, eth_frame_offload_type)) + MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, 1); + + err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); + if (err) + goto err_cmd; + + kfree(in); + mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + + return 0; + +err_cmd: + kfree(in); +err_alloc: + umems_destroy(ndev, mvq); + return err; +} + +static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +{ + u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {}; + u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {}; + + MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode, + MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id); + MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid); + MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type, + MLX5_OBJ_TYPE_VIRTIO_NET_Q); + if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) { + mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id); + return; + } + umems_destroy(ndev, mvq); +} + +static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw) +{ + return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn; +} + +static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw) +{ + return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn; +} + +static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out, + int *outlen, u32 qpn, u32 rqpn) +{ + void *qpc; + void *pp; + + switch (cmd) { + case MLX5_CMD_OP_2RST_QP: + *inlen = MLX5_ST_SZ_BYTES(qp_2rst_in); + *outlen = MLX5_ST_SZ_BYTES(qp_2rst_out); + *in = kzalloc(*inlen, GFP_KERNEL); + *out = kzalloc(*outlen, GFP_KERNEL); + if (!in || !out) + goto outerr; + + MLX5_SET(qp_2rst_in, *in, opcode, cmd); + MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid); + MLX5_SET(qp_2rst_in, *in, qpn, qpn); + break; + case MLX5_CMD_OP_RST2INIT_QP: + *inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in); + *outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out); + *in = kzalloc(*inlen, GFP_KERNEL); + *out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL); + if (!in || !out) + goto outerr; + + MLX5_SET(rst2init_qp_in, *in, opcode, cmd); + MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid); + MLX5_SET(rst2init_qp_in, *in, qpn, qpn); + qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc); + MLX5_SET(qpc, qpc, remote_qpn, rqpn); + MLX5_SET(qpc, qpc, rwe, 1); + pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path); + MLX5_SET(ads, pp, vhca_port_num, 1); + break; + case MLX5_CMD_OP_INIT2RTR_QP: + *inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in); + *outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out); + *in = kzalloc(*inlen, GFP_KERNEL); + *out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL); + if (!in || !out) + goto outerr; + + MLX5_SET(init2rtr_qp_in, *in, opcode, cmd); + MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid); + MLX5_SET(init2rtr_qp_in, *in, qpn, qpn); + qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc); + MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES); + MLX5_SET(qpc, qpc, log_msg_max, 30); + MLX5_SET(qpc, qpc, remote_qpn, rqpn); + pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path); + MLX5_SET(ads, pp, fl, 1); + break; + case MLX5_CMD_OP_RTR2RTS_QP: + *inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in); + *outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out); + *in = kzalloc(*inlen, GFP_KERNEL); + *out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL); + if (!in || !out) + goto outerr; + + MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd); + MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid); + MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn); + qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc); + pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path); + MLX5_SET(ads, pp, ack_timeout, 14); + MLX5_SET(qpc, qpc, retry_count, 7); + MLX5_SET(qpc, qpc, rnr_retry, 7); + break; + default: + goto outerr; + } + if (!*in || !*out) + goto outerr; + + return; + +outerr: + kfree(*in); + kfree(*out); + *in = NULL; + *out = NULL; +} + +static void free_inout(void *in, void *out) +{ + kfree(in); + kfree(out); +} + +/* Two QPs are used by each virtqueue. One is used by the driver and one by + * firmware. The fw argument indicates whether the subjected QP is the one used + * by firmware. + */ +static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd) +{ + int outlen; + int inlen; + void *out; + void *in; + int err; + + alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw)); + if (!in || !out) + return -ENOMEM; + + err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen); + free_inout(in, out); + return err; +} + +static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +{ + int err; + + err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP); + if (err) + return err; + + err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP); + if (err) + return err; + + err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP); + if (err) + return err; + + err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP); + if (err) + return err; + + err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP); + if (err) + return err; + + err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP); + if (err) + return err; + + return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP); +} + +struct mlx5_virtq_attr { + u8 state; + u16 available_index; +}; + +static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, + struct mlx5_virtq_attr *attr) +{ + int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out); + u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {}; + void *out; + void *obj_context; + void *cmd_hdr; + int err; + + out = kzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr); + + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q); + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id); + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); + err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen); + if (err) + goto err_cmd; + + obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context); + memset(attr, 0, sizeof(*attr)); + attr->state = MLX5_GET(virtio_net_q_object, obj_context, state); + attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index); + kfree(out); + return 0; + +err_cmd: + kfree(out); + return err; +} + +static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state) +{ + int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in); + u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {}; + void *obj_context; + void *cmd_hdr; + void *in; + int err; + + in = kzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr); + + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q); + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id); + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); + + obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context); + MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select, + MLX5_VIRTQ_MODIFY_MASK_STATE); + MLX5_SET(virtio_net_q_object, obj_context, state, state); + err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); + kfree(in); + if (!err) + mvq->fw_state = state; + + return err; +} + +static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +{ + u16 idx = mvq->index; + int err; + + if (!mvq->num_ent) + return 0; + + if (mvq->initialized) { + mlx5_vdpa_warn(&ndev->mvdev, "attempt re init\n"); + return -EINVAL; + } + + err = cq_create(ndev, idx, mvq->num_ent); + if (err) + return err; + + err = qp_create(ndev, mvq, &mvq->fwqp); + if (err) + goto err_fwqp; + + err = qp_create(ndev, mvq, &mvq->vqqp); + if (err) + goto err_vqqp; + + err = connect_qps(ndev, mvq); + if (err) + goto err_connect; + + err = create_virtqueue(ndev, mvq); + if (err) + goto err_connect; + + if (mvq->ready) { + err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY); + if (err) { + mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n", + idx, err); + goto err_connect; + } + } + + mvq->initialized = true; + return 0; + +err_connect: + qp_destroy(ndev, &mvq->vqqp); +err_vqqp: + qp_destroy(ndev, &mvq->fwqp); +err_fwqp: + cq_destroy(ndev, idx); + return err; +} + +static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +{ + struct mlx5_virtq_attr attr; + + if (!mvq->initialized) + return; + + if (query_virtqueue(ndev, mvq, &attr)) { + mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n"); + return; + } + if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) + return; + + if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND)) + mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n"); +} + +static void suspend_vqs(struct mlx5_vdpa_net *ndev) +{ + int i; + + for (i = 0; i < MLX5_MAX_SUPPORTED_VQS; i++) + suspend_vq(ndev, &ndev->vqs[i]); +} + +static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +{ + if (!mvq->initialized) + return; + + suspend_vq(ndev, mvq); + destroy_virtqueue(ndev, mvq); + qp_destroy(ndev, &mvq->vqqp); + qp_destroy(ndev, &mvq->fwqp); + cq_destroy(ndev, mvq->index); + mvq->initialized = false; +} + +static int create_rqt(struct mlx5_vdpa_net *ndev) +{ + int log_max_rqt; + __be32 *list; + void *rqtc; + int inlen; + void *in; + int i, j; + int err; + + log_max_rqt = min_t(int, 1, MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size)); + if (log_max_rqt < 1) + return -EOPNOTSUPP; + + inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + (1 << log_max_rqt) * MLX5_ST_SZ_BYTES(rq_num); + in = kzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid); + rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context); + + MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q); + MLX5_SET(rqtc, rqtc, rqt_max_size, 1 << log_max_rqt); + MLX5_SET(rqtc, rqtc, rqt_actual_size, 1); + list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]); + for (i = 0, j = 0; j < ndev->mvdev.max_vqs; j++) { + if (!ndev->vqs[j].initialized) + continue; + + if (!vq_is_tx(ndev->vqs[j].index)) { + list[i] = cpu_to_be32(ndev->vqs[j].virtq_id); + i++; + } + } + + err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn); + kfree(in); + if (err) + return err; + + return 0; +} + +static void destroy_rqt(struct mlx5_vdpa_net *ndev) +{ + mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn); +} + +static int create_tir(struct mlx5_vdpa_net *ndev) +{ +#define HASH_IP_L4PORTS \ + (MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT | \ + MLX5_HASH_FIELD_SEL_L4_DPORT) + static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7, + 0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94, + 0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1, + 0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59, + 0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a }; + void *rss_key; + void *outer; + void *tirc; + void *in; + int err; + + in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid); + tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); + MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); + + MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); + MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ); + rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key); + memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key)); + + outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); + MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4); + MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP); + MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS); + + MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn); + MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn); + + err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn); + kfree(in); + return err; +} + +static void destroy_tir(struct mlx5_vdpa_net *ndev) +{ + mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn); +} + +static int add_fwd_to_tir(struct mlx5_vdpa_net *ndev) +{ + struct mlx5_flow_destination dest[2] = {}; + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_namespace *ns; + int err; + + /* for now, one entry, match all, forward to tir */ + ft_attr.max_fte = 1; + ft_attr.autogroup.max_num_groups = 1; + + ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS); + if (!ns) { + mlx5_vdpa_warn(&ndev->mvdev, "get flow namespace\n"); + return -EOPNOTSUPP; + } + + ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); + if (IS_ERR(ndev->rxft)) + return PTR_ERR(ndev->rxft); + + ndev->rx_counter = mlx5_fc_create(ndev->mvdev.mdev, false); + if (IS_ERR(ndev->rx_counter)) { + err = PTR_ERR(ndev->rx_counter); + goto err_fc; + } + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; + dest[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dest[0].tir_num = ndev->res.tirn; + dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest[1].counter_id = mlx5_fc_id(ndev->rx_counter); + ndev->rx_rule = mlx5_add_flow_rules(ndev->rxft, NULL, &flow_act, dest, 2); + if (IS_ERR(ndev->rx_rule)) { + err = PTR_ERR(ndev->rx_rule); + ndev->rx_rule = NULL; + goto err_rule; + } + + return 0; + +err_rule: + mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter); +err_fc: + mlx5_destroy_flow_table(ndev->rxft); + return err; +} + +static void remove_fwd_to_tir(struct mlx5_vdpa_net *ndev) +{ + if (!ndev->rx_rule) + return; + + mlx5_del_flow_rules(ndev->rx_rule); + mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter); + mlx5_destroy_flow_table(ndev->rxft); + + ndev->rx_rule = NULL; +} + +static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; + + if (unlikely(!mvq->ready)) + return; + + iowrite16(idx, ndev->mvdev.res.kick_addr); +} + +static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area, + u64 driver_area, u64 device_area) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; + + mvq->desc_addr = desc_area; + mvq->device_addr = device_area; + mvq->driver_addr = driver_area; + return 0; +} + +static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + struct mlx5_vdpa_virtqueue *mvq; + + mvq = &ndev->vqs[idx]; + mvq->num_ent = num; +} + +static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + struct mlx5_vdpa_virtqueue *vq = &ndev->vqs[idx]; + + vq->event_cb = *cb; +} + +static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; + + if (!ready) + suspend_vq(ndev, mvq); + + mvq->ready = ready; +} + +static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; + + return mvq->ready; +} + +static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx, + const struct vdpa_vq_state *state) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; + + if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) { + mlx5_vdpa_warn(mvdev, "can't modify available index\n"); + return -EINVAL; + } + + mvq->avail_idx = state->avail_index; + return 0; +} + +static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; + struct mlx5_virtq_attr attr; + int err; + + if (!mvq->initialized) + return -EAGAIN; + + err = query_virtqueue(ndev, mvq, &attr); + if (err) { + mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n"); + return err; + } + state->avail_index = attr.available_index; + return 0; +} + +static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev) +{ + return PAGE_SIZE; +} + +enum { MLX5_VIRTIO_NET_F_GUEST_CSUM = 1 << 9, + MLX5_VIRTIO_NET_F_CSUM = 1 << 10, + MLX5_VIRTIO_NET_F_HOST_TSO6 = 1 << 11, + MLX5_VIRTIO_NET_F_HOST_TSO4 = 1 << 12, +}; + +static u64 mlx_to_vritio_features(u16 dev_features) +{ + u64 result = 0; + + if (dev_features & MLX5_VIRTIO_NET_F_GUEST_CSUM) + result |= BIT(VIRTIO_NET_F_GUEST_CSUM); + if (dev_features & MLX5_VIRTIO_NET_F_CSUM) + result |= BIT(VIRTIO_NET_F_CSUM); + if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO6) + result |= BIT(VIRTIO_NET_F_HOST_TSO6); + if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO4) + result |= BIT(VIRTIO_NET_F_HOST_TSO4); + + return result; +} + +static u64 mlx5_vdpa_get_features(struct vdpa_device *vdev) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + u16 dev_features; + + dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, device_features_bits_mask); + ndev->mvdev.mlx_features = mlx_to_vritio_features(dev_features); + if (MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, virtio_version_1_0)) + ndev->mvdev.mlx_features |= BIT(VIRTIO_F_VERSION_1); + ndev->mvdev.mlx_features |= BIT(VIRTIO_F_ACCESS_PLATFORM); + print_features(mvdev, ndev->mvdev.mlx_features, false); + return ndev->mvdev.mlx_features; +} + +static int verify_min_features(struct mlx5_vdpa_dev *mvdev, u64 features) +{ + if (!(features & BIT(VIRTIO_F_ACCESS_PLATFORM))) + return -EOPNOTSUPP; + + return 0; +} + +static int setup_virtqueues(struct mlx5_vdpa_net *ndev) +{ + int err; + int i; + + for (i = 0; i < 2 * mlx5_vdpa_max_qps(ndev->mvdev.max_vqs); i++) { + err = setup_vq(ndev, &ndev->vqs[i]); + if (err) + goto err_vq; + } + + return 0; + +err_vq: + for (--i; i >= 0; i--) + teardown_vq(ndev, &ndev->vqs[i]); + + return err; +} + +static void teardown_virtqueues(struct mlx5_vdpa_net *ndev) +{ + struct mlx5_vdpa_virtqueue *mvq; + int i; + + for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) { + mvq = &ndev->vqs[i]; + if (!mvq->initialized) + continue; + + teardown_vq(ndev, mvq); + } +} + +static int mlx5_vdpa_set_features(struct vdpa_device *vdev, u64 features) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + int err; + + print_features(mvdev, features, true); + + err = verify_min_features(mvdev, features); + if (err) + return err; + + ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features; + return err; +} + +static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb) +{ + /* not implemented */ + mlx5_vdpa_warn(to_mvdev(vdev), "set config callback not supported\n"); +} + +#define MLX5_VDPA_MAX_VQ_ENTRIES 256 +static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev) +{ + return MLX5_VDPA_MAX_VQ_ENTRIES; +} + +static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev) +{ + return VIRTIO_ID_NET; +} + +static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev) +{ + return PCI_VENDOR_ID_MELLANOX; +} + +static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + + print_status(mvdev, ndev->mvdev.status, false); + return ndev->mvdev.status; +} + +static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +{ + struct mlx5_vq_restore_info *ri = &mvq->ri; + struct mlx5_virtq_attr attr; + int err; + + if (!mvq->initialized) + return 0; + + err = query_virtqueue(ndev, mvq, &attr); + if (err) + return err; + + ri->avail_index = attr.available_index; + ri->ready = mvq->ready; + ri->num_ent = mvq->num_ent; + ri->desc_addr = mvq->desc_addr; + ri->device_addr = mvq->device_addr; + ri->driver_addr = mvq->driver_addr; + ri->cb = mvq->event_cb; + ri->restore = true; + return 0; +} + +static int save_channels_info(struct mlx5_vdpa_net *ndev) +{ + int i; + + for (i = 0; i < ndev->mvdev.max_vqs; i++) { + memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri)); + save_channel_info(ndev, &ndev->vqs[i]); + } + return 0; +} + +static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev) +{ + int i; + + for (i = 0; i < ndev->mvdev.max_vqs; i++) + memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri)); +} + +static void restore_channels_info(struct mlx5_vdpa_net *ndev) +{ + struct mlx5_vdpa_virtqueue *mvq; + struct mlx5_vq_restore_info *ri; + int i; + + mlx5_clear_vqs(ndev); + init_mvqs(ndev); + for (i = 0; i < ndev->mvdev.max_vqs; i++) { + mvq = &ndev->vqs[i]; + ri = &mvq->ri; + if (!ri->restore) + continue; + + mvq->avail_idx = ri->avail_index; + mvq->ready = ri->ready; + mvq->num_ent = ri->num_ent; + mvq->desc_addr = ri->desc_addr; + mvq->device_addr = ri->device_addr; + mvq->driver_addr = ri->driver_addr; + mvq->event_cb = ri->cb; + } +} + +static int mlx5_vdpa_change_map(struct mlx5_vdpa_net *ndev, struct vhost_iotlb *iotlb) +{ + int err; + + suspend_vqs(ndev); + err = save_channels_info(ndev); + if (err) + goto err_mr; + + teardown_driver(ndev); + mlx5_vdpa_destroy_mr(&ndev->mvdev); + err = mlx5_vdpa_create_mr(&ndev->mvdev, iotlb); + if (err) + goto err_mr; + + restore_channels_info(ndev); + err = setup_driver(ndev); + if (err) + goto err_setup; + + return 0; + +err_setup: + mlx5_vdpa_destroy_mr(&ndev->mvdev); +err_mr: + return err; +} + +static int setup_driver(struct mlx5_vdpa_net *ndev) +{ + int err; + + mutex_lock(&ndev->reslock); + if (ndev->setup) { + mlx5_vdpa_warn(&ndev->mvdev, "setup driver called for already setup driver\n"); + err = 0; + goto out; + } + err = setup_virtqueues(ndev); + if (err) { + mlx5_vdpa_warn(&ndev->mvdev, "setup_virtqueues\n"); + goto out; + } + + err = create_rqt(ndev); + if (err) { + mlx5_vdpa_warn(&ndev->mvdev, "create_rqt\n"); + goto err_rqt; + } + + err = create_tir(ndev); + if (err) { + mlx5_vdpa_warn(&ndev->mvdev, "create_tir\n"); + goto err_tir; + } + + err = add_fwd_to_tir(ndev); + if (err) { + mlx5_vdpa_warn(&ndev->mvdev, "add_fwd_to_tir\n"); + goto err_fwd; + } + ndev->setup = true; + mutex_unlock(&ndev->reslock); + + return 0; + +err_fwd: + destroy_tir(ndev); +err_tir: + destroy_rqt(ndev); +err_rqt: + teardown_virtqueues(ndev); +out: + mutex_unlock(&ndev->reslock); + return err; +} + +static void teardown_driver(struct mlx5_vdpa_net *ndev) +{ + mutex_lock(&ndev->reslock); + if (!ndev->setup) + goto out; + + remove_fwd_to_tir(ndev); + destroy_tir(ndev); + destroy_rqt(ndev); + teardown_virtqueues(ndev); + ndev->setup = false; +out: + mutex_unlock(&ndev->reslock); +} + +static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + int err; + + print_status(mvdev, status, true); + if (!status) { + mlx5_vdpa_info(mvdev, "performing device reset\n"); + teardown_driver(ndev); + mlx5_vdpa_destroy_mr(&ndev->mvdev); + ndev->mvdev.status = 0; + ndev->mvdev.mlx_features = 0; + ++mvdev->generation; + return; + } + + if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) { + if (status & VIRTIO_CONFIG_S_DRIVER_OK) { + err = setup_driver(ndev); + if (err) { + mlx5_vdpa_warn(mvdev, "failed to setup driver\n"); + goto err_setup; + } + } else { + mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n"); + return; + } + } + + ndev->mvdev.status = status; + return; + +err_setup: + mlx5_vdpa_destroy_mr(&ndev->mvdev); + ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED; +} + +static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf, + unsigned int len) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + + if (offset + len < sizeof(struct virtio_net_config)) + memcpy(buf, &ndev->config + offset, len); +} + +static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf, + unsigned int len) +{ + /* not supported */ +} + +static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + + return mvdev->generation; +} + +static int mlx5_vdpa_set_map(struct vdpa_device *vdev, struct vhost_iotlb *iotlb) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + bool change_map; + int err; + + err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map); + if (err) { + mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err); + return err; + } + + if (change_map) + return mlx5_vdpa_change_map(ndev, iotlb); + + return 0; +} + +static void mlx5_vdpa_free(struct vdpa_device *vdev) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev; + + ndev = to_mlx5_vdpa_ndev(mvdev); + + free_resources(ndev); + mlx5_vdpa_free_resources(&ndev->mvdev); + mutex_destroy(&ndev->reslock); +} + +static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx) +{ + struct vdpa_notification_area ret = {}; + + return ret; +} + +static int mlx5_get_vq_irq(struct vdpa_device *vdv, u16 idx) +{ + return -EOPNOTSUPP; +} + +static const struct vdpa_config_ops mlx5_vdpa_ops = { + .set_vq_address = mlx5_vdpa_set_vq_address, + .set_vq_num = mlx5_vdpa_set_vq_num, + .kick_vq = mlx5_vdpa_kick_vq, + .set_vq_cb = mlx5_vdpa_set_vq_cb, + .set_vq_ready = mlx5_vdpa_set_vq_ready, + .get_vq_ready = mlx5_vdpa_get_vq_ready, + .set_vq_state = mlx5_vdpa_set_vq_state, + .get_vq_state = mlx5_vdpa_get_vq_state, + .get_vq_notification = mlx5_get_vq_notification, + .get_vq_irq = mlx5_get_vq_irq, + .get_vq_align = mlx5_vdpa_get_vq_align, + .get_features = mlx5_vdpa_get_features, + .set_features = mlx5_vdpa_set_features, + .set_config_cb = mlx5_vdpa_set_config_cb, + .get_vq_num_max = mlx5_vdpa_get_vq_num_max, + .get_device_id = mlx5_vdpa_get_device_id, + .get_vendor_id = mlx5_vdpa_get_vendor_id, + .get_status = mlx5_vdpa_get_status, + .set_status = mlx5_vdpa_set_status, + .get_config = mlx5_vdpa_get_config, + .set_config = mlx5_vdpa_set_config, + .get_generation = mlx5_vdpa_get_generation, + .set_map = mlx5_vdpa_set_map, + .free = mlx5_vdpa_free, +}; + +static int alloc_resources(struct mlx5_vdpa_net *ndev) +{ + struct mlx5_vdpa_net_resources *res = &ndev->res; + int err; + + if (res->valid) { + mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n"); + return -EEXIST; + } + + err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn); + if (err) + return err; + + err = create_tis(ndev); + if (err) + goto err_tis; + + res->valid = true; + + return 0; + +err_tis: + mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn); + return err; +} + +static void free_resources(struct mlx5_vdpa_net *ndev) +{ + struct mlx5_vdpa_net_resources *res = &ndev->res; + + if (!res->valid) + return; + + destroy_tis(ndev); + mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn); + res->valid = false; +} + +static void init_mvqs(struct mlx5_vdpa_net *ndev) +{ + struct mlx5_vdpa_virtqueue *mvq; + int i; + + for (i = 0; i < 2 * mlx5_vdpa_max_qps(ndev->mvdev.max_vqs); ++i) { + mvq = &ndev->vqs[i]; + memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri)); + mvq->index = i; + mvq->ndev = ndev; + mvq->fwqp.fw = true; + } + for (; i < ndev->mvdev.max_vqs; i++) { + mvq = &ndev->vqs[i]; + memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri)); + mvq->index = i; + mvq->ndev = ndev; + } +} + +void *mlx5_vdpa_add_dev(struct mlx5_core_dev *mdev) +{ + struct virtio_net_config *config; + struct mlx5_vdpa_dev *mvdev; + struct mlx5_vdpa_net *ndev; + u32 max_vqs; + int err; + + /* we save one virtqueue for control virtqueue should we require it */ + max_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues); + max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS); + + ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops, + 2 * mlx5_vdpa_max_qps(max_vqs)); + if (IS_ERR(ndev)) + return ndev; + + ndev->mvdev.max_vqs = max_vqs; + mvdev = &ndev->mvdev; + mvdev->mdev = mdev; + init_mvqs(ndev); + mutex_init(&ndev->reslock); + config = &ndev->config; + err = mlx5_query_nic_vport_mtu(mdev, &config->mtu); + if (err) + goto err_mtu; + + err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac); + if (err) + goto err_mtu; + + mvdev->vdev.dma_dev = mdev->device; + err = mlx5_vdpa_alloc_resources(&ndev->mvdev); + if (err) + goto err_mtu; + + err = alloc_resources(ndev); + if (err) + goto err_res; + + err = vdpa_register_device(&mvdev->vdev); + if (err) + goto err_reg; + + return ndev; + +err_reg: + free_resources(ndev); +err_res: + mlx5_vdpa_free_resources(&ndev->mvdev); +err_mtu: + mutex_destroy(&ndev->reslock); + put_device(&mvdev->vdev.dev); + return ERR_PTR(err); +} + +void mlx5_vdpa_remove_dev(struct mlx5_vdpa_dev *mvdev) +{ + vdpa_unregister_device(&mvdev->vdev); +} diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.h b/drivers/vdpa/mlx5/net/mlx5_vnet.h new file mode 100644 index 000000000000..f2d6d68b020e --- /dev/null +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies Ltd. */ + +#ifndef __MLX5_VNET_H_ +#define __MLX5_VNET_H_ + +#include +#include +#include +#include +#include +#include +#include "mlx5_vdpa.h" + +static inline u32 mlx5_vdpa_max_qps(int max_vqs) +{ + return max_vqs / 2; +} + +#define to_mlx5_vdpa_ndev(__mvdev) container_of(__mvdev, struct mlx5_vdpa_net, mvdev) +void *mlx5_vdpa_add_dev(struct mlx5_core_dev *mdev); +void mlx5_vdpa_remove_dev(struct mlx5_vdpa_dev *mvdev); + +#endif /* __MLX5_VNET_H_ */ From c84f91e2622235bb742f9f20b8675cf095157026 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 5 Aug 2020 19:55:50 -0400 Subject: [PATCH 65/71] virtio_config: fix up warnings on parisc Apparently, on parisc le16_to_cpu returns an int. virtio_cread_le is very strict about type sizes so it causes a warning. Fix it up by casting to the correct type. Reported-by: kernel test robot Signed-off-by: Michael S. Tsirkin Link: https://lore.kernel.org/r/20200805235550.1451637-1-mst@redhat.com --- include/linux/virtio_config.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index ecb166c824bb..8fe857e27ef3 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -357,10 +357,10 @@ static inline __virtio64 cpu_to_virtio64(struct virtio_device *vdev, u64 val) */ #define virtio_le_to_cpu(x) \ _Generic((x), \ - __u8: (x), \ - __le16: le16_to_cpu(x), \ - __le32: le32_to_cpu(x), \ - __le64: le64_to_cpu(x) \ + __u8: (u8)(x), \ + __le16: (u16)le16_to_cpu(x), \ + __le32: (u32)le32_to_cpu(x), \ + __le64: (u64)le64_to_cpu(x) \ ) #define virtio_cpu_to_le(x, m) \ @@ -400,7 +400,6 @@ static inline __virtio64 cpu_to_virtio64(struct virtio_device *vdev, u64 val) *(ptr) = virtio_le_to_cpu(virtio_cread_v); \ } while(0) -/* Config space accessors. */ #define virtio_cwrite_le(vdev, structname, member, ptr) \ do { \ typeof(((structname*)0)->member) virtio_cwrite_v = \ From 1e3e792650d2c0df8dd796906275b7c79e278664 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 10 Aug 2020 08:44:43 -0400 Subject: [PATCH 66/71] vdpa_sim: init iommu lock The patch adding the iommu lock did not initialize it. The struct is zero-initialized so this is mostly a problem when using lockdep. Reported-by: kernel test robot Cc: Max Gurtovoy Fixes: 0ea9ee430e74 ("vdpasim: protect concurrent access to iommu iotlb") Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index df3224b138ee..604d9d25ca47 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -358,6 +358,7 @@ static struct vdpasim *vdpasim_create(void) INIT_WORK(&vdpasim->work, vdpasim_work); spin_lock_init(&vdpasim->lock); + spin_lock_init(&vdpasim->iommu_lock); dev = &vdpasim->vdpa.dev; dev->coherent_dma_mask = DMA_BIT_MASK(64); From 05acc4beb24c7e5ed3ae20a3d3ab2b29b40cb385 Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Thu, 6 Aug 2020 19:56:15 +0100 Subject: [PATCH 67/71] vdpa/mlx5: Fix uninitialised variable in core/mr.c If the kernel is unable to allocate memory for the variable dmr then err will be returned without being set. Set err to -ENOMEM in this case. Fixes: 94abbccdf291 ("vdpa/mlx5: Add shared memory registration code") Addresses-Coverity: ("Uninitialized variables") Signed-off-by: Alex Dewar Link: https://lore.kernel.org/r/20200806185625.67344-1-alex.dewar@gmx.co.uk Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Acked-by: Eli Cohen --- drivers/vdpa/mlx5/core/mr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c index f5dec0274133..ef1c550f8266 100644 --- a/drivers/vdpa/mlx5/core/mr.c +++ b/drivers/vdpa/mlx5/core/mr.c @@ -319,8 +319,10 @@ static int add_direct_chain(struct mlx5_vdpa_dev *mvdev, u64 start, u64 size, u8 while (size) { sz = (u32)min_t(u64, MAX_KLM_SIZE, size); dmr = kzalloc(sizeof(*dmr), GFP_KERNEL); - if (!dmr) + if (!dmr) { + err = -ENOMEM; goto err_alloc; + } dmr->start = st; dmr->end = st + sz; From f31231bf26a523de8aad4488643a98174c0d6bb2 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 6 Aug 2020 17:08:28 +0100 Subject: [PATCH 68/71] vdpa/mlx5: fix memory allocation failure checks The memory allocation failure checking for in and out is currently checking if the pointers are valid rather than the contents of what they point to. Hence the null check on failed memory allocations is incorrect. Fix this by adding the missing indirection in the check. Also for the default case, just set the *in and *out to null as these don't have any thing allocated to kfree. Finally remove the redundant *in and *out check as these have been already done on each allocation in the case statement. Addresses-Coverity: ("Null pointer dereference") Fixes: 1a86b377aa21 ("vdpa/mlx5: Add VDPA driver for supported mlx5 devices") Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20200806160828.90463-1-colin.king@canonical.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Acked-by: Eli Cohen --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 3ec44a4f0e45..55bc58e1dae9 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -867,7 +867,7 @@ static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inl *outlen = MLX5_ST_SZ_BYTES(qp_2rst_out); *in = kzalloc(*inlen, GFP_KERNEL); *out = kzalloc(*outlen, GFP_KERNEL); - if (!in || !out) + if (!*in || !*out) goto outerr; MLX5_SET(qp_2rst_in, *in, opcode, cmd); @@ -879,7 +879,7 @@ static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inl *outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out); *in = kzalloc(*inlen, GFP_KERNEL); *out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL); - if (!in || !out) + if (!*in || !*out) goto outerr; MLX5_SET(rst2init_qp_in, *in, opcode, cmd); @@ -896,7 +896,7 @@ static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inl *outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out); *in = kzalloc(*inlen, GFP_KERNEL); *out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL); - if (!in || !out) + if (!*in || !*out) goto outerr; MLX5_SET(init2rtr_qp_in, *in, opcode, cmd); @@ -914,7 +914,7 @@ static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inl *outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out); *in = kzalloc(*inlen, GFP_KERNEL); *out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL); - if (!in || !out) + if (!*in || !*out) goto outerr; MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd); @@ -927,16 +927,15 @@ static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inl MLX5_SET(qpc, qpc, rnr_retry, 7); break; default: - goto outerr; + goto outerr_nullify; } - if (!*in || !*out) - goto outerr; return; outerr: kfree(*in); kfree(*out); +outerr_nullify: *in = NULL; *out = NULL; } From 2874211fcdb751e2962d68cdb9783fa1ff8eb340 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 8 Aug 2020 12:32:41 +0300 Subject: [PATCH 69/71] vdpa/mlx5: Fix pointer math in mlx5_vdpa_get_config() There is a pointer math bug here so if "offset" is non-zero then this will copy memory from beyond the end of the array. Fixes: 1a86b377aa21 ("vdpa/mlx5: Add VDPA driver for supported mlx5 devices") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/20200808093241.GB115053@mwanda Signed-off-by: Michael S. Tsirkin Acked-by: Eli Cohen Cc: Jason Wang ; Parav Pandit ; virtualization@lists.linux-foundation.org; linux-kernel@vger.kernel.org; kernel-janitors@vger.kernel.org Fixes: 1a86b377aa21 ("vdpa/mlx5: Add VDPA driver for supported mlx5 devices") Signed-off-by: Dan Carpenter --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 55bc58e1dae9..c6b9ec47e51d 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -1757,7 +1757,7 @@ static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); if (offset + len < sizeof(struct virtio_net_config)) - memcpy(buf, &ndev->config + offset, len); + memcpy(buf, (u8 *)&ndev->config + offset, len); } static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf, From cf16fe9243bfa2863491026fc727618c7c593c84 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 6 Apr 2020 17:45:52 +0300 Subject: [PATCH 70/71] vdpa: Fix pointer math bug in vdpasim_get_config() If "offset" is non-zero then we end up copying from beyond the end of the config because of pointer math. We can fix this by casting the struct to a u8 pointer. Fixes: 2c53d0f64c06 ("vdpasim: vDPA device simulator") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/20200406144552.GF68494@mwanda Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 604d9d25ca47..62d640327145 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -558,7 +558,7 @@ static void vdpasim_get_config(struct vdpa_device *vdpa, unsigned int offset, struct vdpasim *vdpasim = vdpa_to_sim(vdpa); if (offset + len < sizeof(struct virtio_net_config)) - memcpy(buf, &vdpasim->config + offset, len); + memcpy(buf, (u8 *)&vdpasim->config + offset, len); } static void vdpasim_set_config(struct vdpa_device *vdpa, unsigned int offset, From 8a7c3213db068135e816a6a517157de6443290d6 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 10 Aug 2020 09:13:10 -0400 Subject: [PATCH 71/71] vdpa/mlx5: fix up endian-ness for mtu VDPA mlx5 accesses config space as native endian - this is wrong since it's a modern device and actually uses LE. It only supports modern guests so we could punt and just force LE, but let's use the full virtio APIs since people tend to copy/paste code, and this is not data path anyway. Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index c6b9ec47e51d..9df69d5efe8c 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -137,6 +137,7 @@ struct mlx5_vdpa_net { struct mlx5_fc *rx_counter; struct mlx5_flow_handle *rx_rule; bool setup; + u16 mtu; }; static void free_resources(struct mlx5_vdpa_net *ndev); @@ -1506,6 +1507,13 @@ static void teardown_virtqueues(struct mlx5_vdpa_net *ndev) } } +/* TODO: cross-endian support */ +static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev) +{ + return virtio_legacy_is_little_endian() || + (mvdev->actual_features & (1ULL << VIRTIO_F_VERSION_1)); +} + static int mlx5_vdpa_set_features(struct vdpa_device *vdev, u64 features) { struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); @@ -1519,6 +1527,8 @@ static int mlx5_vdpa_set_features(struct vdpa_device *vdev, u64 features) return err; ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features; + ndev->config.mtu = __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), + ndev->mtu); return err; } @@ -1925,7 +1935,7 @@ void *mlx5_vdpa_add_dev(struct mlx5_core_dev *mdev) init_mvqs(ndev); mutex_init(&ndev->reslock); config = &ndev->config; - err = mlx5_query_nic_vport_mtu(mdev, &config->mtu); + err = mlx5_query_nic_vport_mtu(mdev, &ndev->mtu); if (err) goto err_mtu;