From e951747a087a8655f467833bb367ebf53d57527c Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 10 Jul 2018 20:55:19 -0600 Subject: [PATCH] IB/uverbs: Rework the locking for cleaning up the ucontext The locking here has always been a bit crazy and spread out, upon some careful analysis we can simplify things. Create a single function uverbs_destroy_ufile_hw() that internally handles all locking. This pulls together pieces of this process that were sprinkled all over the places into one place, and covers them with one lock. This eliminates several duplicate/confusing locks and makes the control flow in ib_uverbs_close() and ib_uverbs_free_hw_resources() extremely simple. Unfortunately we have to keep an extra mutex, ucontext_lock. This lock is logically part of the rwsem and provides the 'down write, fail if write locked, wait if read locked' semantic we require. Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 117 +++++++++++++++++++++++--- drivers/infiniband/core/rdma_core.h | 3 +- drivers/infiniband/core/uverbs.h | 6 +- drivers/infiniband/core/uverbs_cmd.c | 6 +- drivers/infiniband/core/uverbs_main.c | 98 +++------------------ include/rdma/ib_verbs.h | 5 ++ 6 files changed, 127 insertions(+), 108 deletions(-) diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 4545c661acaa..eeed6374134c 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -32,6 +32,7 @@ #include #include +#include #include #include #include @@ -284,11 +285,8 @@ struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type, } ret = uverbs_try_lock_object(uobj, exclusive); - if (ret) { - WARN(uobj->ufile->cleanup_reason, - "ib_uverbs: Trying to lookup_get while cleanup context\n"); + if (ret) goto free; - } return uobj; free: @@ -694,6 +692,71 @@ void uverbs_close_fd(struct file *f) uverbs_uobject_put(uobj); } +static void ufile_disassociate_ucontext(struct ib_ucontext *ibcontext) +{ + struct ib_device *ib_dev = ibcontext->device; + struct task_struct *owning_process = NULL; + struct mm_struct *owning_mm = NULL; + + owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID); + if (!owning_process) + return; + + owning_mm = get_task_mm(owning_process); + if (!owning_mm) { + pr_info("no mm, disassociate ucontext is pending task termination\n"); + while (1) { + put_task_struct(owning_process); + usleep_range(1000, 2000); + owning_process = get_pid_task(ibcontext->tgid, + PIDTYPE_PID); + if (!owning_process || + owning_process->state == TASK_DEAD) { + pr_info("disassociate ucontext done, task was terminated\n"); + /* in case task was dead need to release the + * task struct. + */ + if (owning_process) + put_task_struct(owning_process); + return; + } + } + } + + down_write(&owning_mm->mmap_sem); + ib_dev->disassociate_ucontext(ibcontext); + up_write(&owning_mm->mmap_sem); + mmput(owning_mm); + put_task_struct(owning_process); +} + +/* + * Drop the ucontext off the ufile and completely disconnect it from the + * ib_device + */ +static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile, + enum rdma_remove_reason reason) +{ + struct ib_ucontext *ucontext = ufile->ucontext; + int ret; + + if (reason == RDMA_REMOVE_DRIVER_REMOVE) + ufile_disassociate_ucontext(ucontext); + + put_pid(ucontext->tgid); + ib_rdmacg_uncharge(&ucontext->cg_obj, ucontext->device, + RDMACG_RESOURCE_HCA_HANDLE); + + /* + * FIXME: Drivers are not permitted to fail dealloc_ucontext, remove + * the error return. + */ + ret = ucontext->device->dealloc_ucontext(ufile->ucontext); + WARN_ON(ret); + + ufile->ucontext = NULL; +} + static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, enum rdma_remove_reason reason) { @@ -710,7 +773,6 @@ static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, * We take and release the lock per traversal in order to let * other threads (which might still use the FDs) chance to run. */ - ufile->cleanup_reason = reason; list_for_each_entry_safe(obj, next_obj, &ufile->uobjects, list) { /* * if we hit this WARN_ON, that means we are @@ -738,18 +800,43 @@ static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, return ret; } -void uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, bool device_removed) +/* + * Destroy the uncontext and every uobject associated with it. If called with + * reason != RDMA_REMOVE_CLOSE this will not return until the destruction has + * been completed and ufile->ucontext is NULL. + * + * This is internally locked and can be called in parallel from multiple + * contexts. + */ +void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile, + enum rdma_remove_reason reason) { - enum rdma_remove_reason reason = device_removed ? - RDMA_REMOVE_DRIVER_REMOVE : - RDMA_REMOVE_CLOSE; + if (reason == RDMA_REMOVE_CLOSE) { + /* + * During destruction we might trigger something that + * synchronously calls release on any file descriptor. For + * this reason all paths that come from file_operations + * release must use try_lock. They can progress knowing that + * there is an ongoing uverbs_destroy_ufile_hw that will clean + * up the driver resources. + */ + if (!mutex_trylock(&ufile->ucontext_lock)) + return; + + } else { + mutex_lock(&ufile->ucontext_lock); + } + + down_write(&ufile->hw_destroy_rwsem); /* - * Waits for all remove_commit and alloc_commit to finish. Logically, We - * want to hold this forever as the context is going to be destroyed, - * but we'll release it since it causes a "held lock freed" BUG message. + * If a ucontext was never created then we can't have any uobjects to + * cleanup, nothing to do. */ - down_write(&ufile->hw_destroy_rwsem); + if (!ufile->ucontext) + goto done; + + ufile->ucontext->closing = true; ufile->ucontext->cleanup_retryable = true; while (!list_empty(&ufile->uobjects)) if (__uverbs_cleanup_ufile(ufile, reason)) { @@ -764,7 +851,11 @@ void uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, bool device_removed) if (!list_empty(&ufile->uobjects)) __uverbs_cleanup_ufile(ufile, reason); + ufile_destroy_ucontext(ufile, reason); + +done: up_write(&ufile->hw_destroy_rwsem); + mutex_unlock(&ufile->ucontext_lock); } const struct uverbs_obj_type_class uverbs_fd_class = { diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h index db2339330f6f..a736b46d18e3 100644 --- a/drivers/infiniband/core/rdma_core.h +++ b/drivers/infiniband/core/rdma_core.h @@ -49,7 +49,8 @@ const struct uverbs_object_spec *uverbs_get_object(struct ib_uverbs_file *ufile, const struct uverbs_method_spec *uverbs_get_method(const struct uverbs_object_spec *object, uint16_t method); -void uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, bool device_removed); +void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile, + enum rdma_remove_reason reason); /* * uverbs_uobject_get is called in order to increase the reference count on diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 58b16e840e56..ca9b0450d3f9 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -136,9 +136,9 @@ struct ib_uverbs_completion_event_file { struct ib_uverbs_file { struct kref ref; - struct mutex mutex; - struct mutex cleanup_mutex; /* protect cleanup */ struct ib_uverbs_device *device; + /* Protects writing to ucontext */ + struct mutex ucontext_lock; struct ib_ucontext *ucontext; struct ib_event_handler event_handler; struct ib_uverbs_async_event_file *async_file; @@ -155,8 +155,6 @@ struct ib_uverbs_file { spinlock_t uobjects_lock; struct list_head uobjects; - enum rdma_remove_reason cleanup_reason; - struct idr idr; /* spinlock protects write access to idr */ spinlock_t idr_lock; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 409fd46a2a99..f2611c760184 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -84,7 +84,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - mutex_lock(&file->mutex); + mutex_lock(&file->ucontext_lock); if (file->ucontext) { ret = -EINVAL; @@ -150,7 +150,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, fd_install(resp.async_fd, filp); - mutex_unlock(&file->mutex); + mutex_unlock(&file->ucontext_lock); return in_len; @@ -169,7 +169,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, ib_rdmacg_uncharge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE); err: - mutex_unlock(&file->mutex); + mutex_unlock(&file->ucontext_lock); return ret; } diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 77faf32fc997..78d79020ea5c 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -41,8 +41,6 @@ #include #include #include -#include -#include #include #include #include @@ -227,21 +225,6 @@ void ib_uverbs_detach_umcast(struct ib_qp *qp, } } -static int ib_uverbs_cleanup_ufile(struct ib_uverbs_file *file, - bool device_removed) -{ - struct ib_ucontext *context = file->ucontext; - - context->closing = 1; - uverbs_cleanup_ufile(file, device_removed); - put_pid(context->tgid); - - ib_rdmacg_uncharge(&context->cg_obj, context->device, - RDMACG_RESOURCE_HCA_HANDLE); - - return context->device->dealloc_ucontext(context); -} - static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev) { complete(&dev->comp); @@ -886,8 +869,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) spin_lock_init(&file->idr_lock); idr_init(&file->idr); kref_init(&file->ref); - mutex_init(&file->mutex); - mutex_init(&file->cleanup_mutex); + mutex_init(&file->ucontext_lock); spin_lock_init(&file->uobjects_lock); INIT_LIST_HEAD(&file->uobjects); @@ -917,12 +899,7 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp) { struct ib_uverbs_file *file = filp->private_data; - mutex_lock(&file->cleanup_mutex); - if (file->ucontext) { - ib_uverbs_cleanup_ufile(file, false); - file->ucontext = NULL; - } - mutex_unlock(&file->cleanup_mutex); + uverbs_destroy_ufile_hw(file, RDMA_REMOVE_CLOSE); idr_destroy(&file->idr); mutex_lock(&file->device->lists_mutex); @@ -1109,44 +1086,6 @@ static void ib_uverbs_add_one(struct ib_device *device) return; } -static void ib_uverbs_disassociate_ucontext(struct ib_ucontext *ibcontext) -{ - struct ib_device *ib_dev = ibcontext->device; - struct task_struct *owning_process = NULL; - struct mm_struct *owning_mm = NULL; - - owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID); - if (!owning_process) - return; - - owning_mm = get_task_mm(owning_process); - if (!owning_mm) { - pr_info("no mm, disassociate ucontext is pending task termination\n"); - while (1) { - put_task_struct(owning_process); - usleep_range(1000, 2000); - owning_process = get_pid_task(ibcontext->tgid, - PIDTYPE_PID); - if (!owning_process || - owning_process->state == TASK_DEAD) { - pr_info("disassociate ucontext done, task was terminated\n"); - /* in case task was dead need to release the - * task struct. - */ - if (owning_process) - put_task_struct(owning_process); - return; - } - } - } - - down_write(&owning_mm->mmap_sem); - ib_dev->disassociate_ucontext(ibcontext); - up_write(&owning_mm->mmap_sem); - mmput(owning_mm); - put_task_struct(owning_process); -} - static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, struct ib_device *ib_dev) { @@ -1162,39 +1101,24 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, mutex_lock(&uverbs_dev->lists_mutex); while (!list_empty(&uverbs_dev->uverbs_file_list)) { - struct ib_ucontext *ucontext; file = list_first_entry(&uverbs_dev->uverbs_file_list, struct ib_uverbs_file, list); file->is_closed = 1; list_del(&file->list); kref_get(&file->ref); + + /* We must release the mutex before going ahead and calling + * uverbs_cleanup_ufile, as it might end up indirectly calling + * uverbs_close, for example due to freeing the resources (e.g + * mmput). + */ mutex_unlock(&uverbs_dev->lists_mutex); - - mutex_lock(&file->cleanup_mutex); - ucontext = file->ucontext; - file->ucontext = NULL; - mutex_unlock(&file->cleanup_mutex); - - /* At this point ib_uverbs_close cannot be running - * ib_uverbs_cleanup_ufile - */ - if (ucontext) { - /* We must release the mutex before going ahead and - * calling disassociate_ucontext. disassociate_ucontext - * might end up indirectly calling uverbs_close, - * for example due to freeing the resources - * (e.g mmput). - */ - ib_uverbs_event_handler(&file->event_handler, &event); - ib_uverbs_disassociate_ucontext(ucontext); - mutex_lock(&file->cleanup_mutex); - ib_uverbs_cleanup_ufile(file, true); - mutex_unlock(&file->cleanup_mutex); - } + ib_uverbs_event_handler(&file->event_handler, &event); + uverbs_destroy_ufile_hw(file, RDMA_REMOVE_DRIVER_REMOVE); + kref_put(&file->ref, ib_uverbs_release_file); mutex_lock(&uverbs_dev->lists_mutex); - kref_put(&file->ref, ib_uverbs_release_file); } while (!list_empty(&uverbs_dev->uverbs_events_file_list)) { diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 99bcf64a4762..42cbf8eabe9d 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1479,6 +1479,11 @@ struct ib_rdmacg_object { struct ib_ucontext { struct ib_device *device; struct ib_uverbs_file *ufile; + /* + * 'closing' can be read by the driver only during a destroy callback, + * it is set when we are closing the file descriptor and indicates + * that mm_sem may be locked. + */ int closing; bool cleanup_retryable;