NFSv4: Add support for CB_RECALL_ANY for flexfiles layouts

When we receive a CB_RECALL_ANY that asks us to return flexfiles
layouts, we iterate through all the layouts and look at whether or
not there are active open file descriptors that might need them
for I/O. If there are no such descriptors, we return the layouts.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
This commit is contained in:
Trond Myklebust 2020-02-18 15:58:31 -05:00
parent 7f156ef0bf
commit b5fdf8418c
7 changed files with 186 additions and 18 deletions

View File

@ -127,7 +127,9 @@ extern __be32 nfs4_callback_sequence(void *argp, void *resp,
#define RCA4_TYPE_MASK_OBJ_LAYOUT_MAX 9
#define RCA4_TYPE_MASK_OTHER_LAYOUT_MIN 12
#define RCA4_TYPE_MASK_OTHER_LAYOUT_MAX 15
#define RCA4_TYPE_MASK_ALL 0xf31f
#define PNFS_FF_RCA4_TYPE_MASK_READ 16
#define PNFS_FF_RCA4_TYPE_MASK_RW 17
#define RCA4_TYPE_MASK_ALL 0x3f31f
struct cb_recallanyargs {
uint32_t craa_objs_to_keep;

View File

@ -597,6 +597,7 @@ __be32 nfs4_callback_recallany(void *argp, void *resp,
struct cb_recallanyargs *args = argp;
__be32 status;
fmode_t flags = 0;
bool schedule_manager = false;
status = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION);
if (!cps->clp) /* set in cb_sequence */
@ -619,6 +620,18 @@ __be32 nfs4_callback_recallany(void *argp, void *resp,
if (args->craa_type_mask & BIT(RCA4_TYPE_MASK_FILE_LAYOUT))
pnfs_recall_all_layouts(cps->clp);
if (args->craa_type_mask & BIT(PNFS_FF_RCA4_TYPE_MASK_READ)) {
set_bit(NFS4CLNT_RECALL_ANY_LAYOUT_READ, &cps->clp->cl_state);
schedule_manager = true;
}
if (args->craa_type_mask & BIT(PNFS_FF_RCA4_TYPE_MASK_RW)) {
set_bit(NFS4CLNT_RECALL_ANY_LAYOUT_RW, &cps->clp->cl_state);
schedule_manager = true;
}
if (schedule_manager)
nfs4_schedule_state_manager(cps->clp);
out:
dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
return status;

View File

@ -42,7 +42,9 @@ enum nfs4_client_state {
NFS4CLNT_LEASE_MOVED,
NFS4CLNT_DELEGATION_EXPIRED,
NFS4CLNT_RUN_MANAGER,
NFS4CLNT_DELEGRETURN_RUNNING,
NFS4CLNT_RECALL_RUNNING,
NFS4CLNT_RECALL_ANY_LAYOUT_READ,
NFS4CLNT_RECALL_ANY_LAYOUT_RW,
};
#define NFS4_RENEW_TIMEOUT 0x01

View File

@ -2524,6 +2524,21 @@ static int nfs4_bind_conn_to_session(struct nfs_client *clp)
}
return 0;
}
static void nfs4_layoutreturn_any_run(struct nfs_client *clp)
{
int iomode = 0;
if (test_and_clear_bit(NFS4CLNT_RECALL_ANY_LAYOUT_READ, &clp->cl_state))
iomode += IOMODE_READ;
if (test_and_clear_bit(NFS4CLNT_RECALL_ANY_LAYOUT_RW, &clp->cl_state))
iomode += IOMODE_RW;
/* Note: IOMODE_READ + IOMODE_RW == IOMODE_ANY */
if (iomode) {
pnfs_layout_return_unused_byclid(clp, iomode);
set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
}
}
#else /* CONFIG_NFS_V4_1 */
static int nfs4_reset_session(struct nfs_client *clp) { return 0; }
@ -2531,6 +2546,10 @@ static int nfs4_bind_conn_to_session(struct nfs_client *clp)
{
return 0;
}
static void nfs4_layoutreturn_any_run(struct nfs_client *clp)
{
}
#endif /* CONFIG_NFS_V4_1 */
static void nfs4_state_manager(struct nfs_client *clp)
@ -2635,12 +2654,13 @@ static void nfs4_state_manager(struct nfs_client *clp)
nfs4_end_drain_session(clp);
nfs4_clear_state_manager_bit(clp);
if (!test_and_set_bit(NFS4CLNT_DELEGRETURN_RUNNING, &clp->cl_state)) {
if (!test_and_set_bit(NFS4CLNT_RECALL_RUNNING, &clp->cl_state)) {
if (test_and_clear_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) {
nfs_client_return_marked_delegations(clp);
set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
}
clear_bit(NFS4CLNT_DELEGRETURN_RUNNING, &clp->cl_state);
nfs4_layoutreturn_any_run(clp);
clear_bit(NFS4CLNT_RECALL_RUNNING, &clp->cl_state);
}
/* Did we race with an attempt to give us more work? */

View File

@ -584,7 +584,9 @@ TRACE_DEFINE_ENUM(NFS4CLNT_MOVED);
TRACE_DEFINE_ENUM(NFS4CLNT_LEASE_MOVED);
TRACE_DEFINE_ENUM(NFS4CLNT_DELEGATION_EXPIRED);
TRACE_DEFINE_ENUM(NFS4CLNT_RUN_MANAGER);
TRACE_DEFINE_ENUM(NFS4CLNT_DELEGRETURN_RUNNING);
TRACE_DEFINE_ENUM(NFS4CLNT_RECALL_RUNNING);
TRACE_DEFINE_ENUM(NFS4CLNT_RECALL_ANY_LAYOUT_READ);
TRACE_DEFINE_ENUM(NFS4CLNT_RECALL_ANY_LAYOUT_RW);
#define show_nfs4_clp_state(state) \
__print_flags(state, "|", \
@ -605,7 +607,9 @@ TRACE_DEFINE_ENUM(NFS4CLNT_DELEGRETURN_RUNNING);
{ NFS4CLNT_LEASE_MOVED, "LEASE_MOVED" }, \
{ NFS4CLNT_DELEGATION_EXPIRED, "DELEGATION_EXPIRED" }, \
{ NFS4CLNT_RUN_MANAGER, "RUN_MANAGER" }, \
{ NFS4CLNT_DELEGRETURN_RUNNING, "DELEGRETURN_RUNNING" })
{ NFS4CLNT_RECALL_RUNNING, "RECALL_RUNNING" }, \
{ NFS4CLNT_RECALL_ANY_LAYOUT_READ, "RECALL_ANY_LAYOUT_READ" }, \
{ NFS4CLNT_RECALL_ANY_LAYOUT_RW, "RECALL_ANY_LAYOUT_RW" })
TRACE_EVENT(nfs4_state_mgr,
TP_PROTO(

View File

@ -309,6 +309,16 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo)
}
}
static struct inode *
pnfs_grab_inode_layout_hdr(struct pnfs_layout_hdr *lo)
{
struct inode *inode = igrab(lo->plh_inode);
if (inode)
return inode;
set_bit(NFS_LAYOUT_INODE_FREEING, &lo->plh_flags);
return NULL;
}
static void
pnfs_set_plh_return_info(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode,
u32 seq)
@ -782,7 +792,7 @@ pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp,
/* If the sb is being destroyed, just bail */
if (!nfs_sb_active(server->super))
break;
inode = igrab(lo->plh_inode);
inode = pnfs_grab_inode_layout_hdr(lo);
if (inode != NULL) {
if (test_and_clear_bit(NFS_LAYOUT_HASHED, &lo->plh_flags))
list_del_rcu(&lo->plh_layouts);
@ -795,7 +805,6 @@ pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp,
} else {
rcu_read_unlock();
spin_unlock(&clp->cl_lock);
set_bit(NFS_LAYOUT_INODE_FREEING, &lo->plh_flags);
}
nfs_sb_deactive(server->super);
spin_lock(&clp->cl_lock);
@ -2434,29 +2443,26 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
return -ENOENT;
}
void pnfs_error_mark_layout_for_return(struct inode *inode,
struct pnfs_layout_segment *lseg)
static void
pnfs_mark_layout_for_return(struct inode *inode,
const struct pnfs_layout_range *range)
{
struct pnfs_layout_hdr *lo = NFS_I(inode)->layout;
struct pnfs_layout_range range = {
.iomode = lseg->pls_range.iomode,
.offset = 0,
.length = NFS4_MAX_UINT64,
};
struct pnfs_layout_hdr *lo;
bool return_now = false;
spin_lock(&inode->i_lock);
lo = NFS_I(inode)->layout;
if (!pnfs_layout_is_valid(lo)) {
spin_unlock(&inode->i_lock);
return;
}
pnfs_set_plh_return_info(lo, range.iomode, 0);
pnfs_set_plh_return_info(lo, range->iomode, 0);
/*
* mark all matching lsegs so that we are sure to have no live
* segments at hand when sending layoutreturn. See pnfs_put_lseg()
* for how it works.
*/
if (pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs, &range, 0) != -EBUSY) {
if (pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs, range, 0) != -EBUSY) {
nfs4_stateid stateid;
enum pnfs_iomode iomode;
@ -2469,8 +2475,126 @@ void pnfs_error_mark_layout_for_return(struct inode *inode,
nfs_commit_inode(inode, 0);
}
}
void pnfs_error_mark_layout_for_return(struct inode *inode,
struct pnfs_layout_segment *lseg)
{
struct pnfs_layout_range range = {
.iomode = lseg->pls_range.iomode,
.offset = 0,
.length = NFS4_MAX_UINT64,
};
pnfs_mark_layout_for_return(inode, &range);
}
EXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return);
static bool
pnfs_layout_can_be_returned(struct pnfs_layout_hdr *lo)
{
return pnfs_layout_is_valid(lo) &&
!test_bit(NFS_LAYOUT_INODE_FREEING, &lo->plh_flags) &&
!test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags);
}
static struct pnfs_layout_segment *
pnfs_find_first_lseg(struct pnfs_layout_hdr *lo,
const struct pnfs_layout_range *range,
enum pnfs_iomode iomode)
{
struct pnfs_layout_segment *lseg;
list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
if (!test_bit(NFS_LSEG_VALID, &lseg->pls_flags))
continue;
if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
continue;
if (lseg->pls_range.iomode != iomode && iomode != IOMODE_ANY)
continue;
if (pnfs_lseg_range_intersecting(&lseg->pls_range, range))
return lseg;
}
return NULL;
}
/* Find open file states whose mode matches that of the range */
static bool
pnfs_should_return_unused_layout(struct pnfs_layout_hdr *lo,
const struct pnfs_layout_range *range)
{
struct list_head *head;
struct nfs_open_context *ctx;
fmode_t mode = 0;
if (!pnfs_layout_can_be_returned(lo) ||
!pnfs_find_first_lseg(lo, range, range->iomode))
return false;
head = &NFS_I(lo->plh_inode)->open_files;
list_for_each_entry_rcu(ctx, head, list) {
if (ctx->state)
mode |= ctx->state->state & (FMODE_READ|FMODE_WRITE);
}
switch (range->iomode) {
default:
break;
case IOMODE_READ:
mode &= ~FMODE_WRITE;
break;
case IOMODE_RW:
if (pnfs_find_first_lseg(lo, range, IOMODE_READ))
mode &= ~FMODE_READ;
}
return mode == 0;
}
static int
pnfs_layout_return_unused_byserver(struct nfs_server *server, void *data)
{
const struct pnfs_layout_range *range = data;
struct pnfs_layout_hdr *lo;
struct inode *inode;
restart:
rcu_read_lock();
list_for_each_entry_rcu(lo, &server->layouts, plh_layouts) {
if (!pnfs_layout_can_be_returned(lo) ||
test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
continue;
inode = lo->plh_inode;
spin_lock(&inode->i_lock);
if (!pnfs_should_return_unused_layout(lo, range)) {
spin_unlock(&inode->i_lock);
continue;
}
spin_unlock(&inode->i_lock);
inode = pnfs_grab_inode_layout_hdr(lo);
if (!inode)
continue;
rcu_read_unlock();
pnfs_mark_layout_for_return(inode, range);
iput(inode);
cond_resched();
goto restart;
}
rcu_read_unlock();
return 0;
}
void
pnfs_layout_return_unused_byclid(struct nfs_client *clp,
enum pnfs_iomode iomode)
{
struct pnfs_layout_range range = {
.iomode = iomode,
.offset = 0,
.length = NFS4_MAX_UINT64,
};
nfs_client_for_each_server(clp, pnfs_layout_return_unused_byserver,
&range);
}
void
pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio)
{

View File

@ -329,6 +329,9 @@ int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *);
struct nfs4_threshold *pnfs_mdsthreshold_alloc(void);
void pnfs_error_mark_layout_for_return(struct inode *inode,
struct pnfs_layout_segment *lseg);
void pnfs_layout_return_unused_byclid(struct nfs_client *clp,
enum pnfs_iomode iomode);
/* nfs4_deviceid_flags */
enum {
NFS_DEVICEID_INVALID = 0, /* set when MDS clientid recalled */