kernel_optimize_test/fs/lockd/svclock.c

/*
 * linux/fs/lockd/svclock.c
 *
 * Handling of server-side locks, mostly of the blocked variety.
 * This is the ugliest part of lockd because we tread on very thin ice.
 * GRANT and CANCEL calls may get stuck, meet in mid-flight, etc.
 * IMNSHO introducing the grant callback into the NLM protocol was one
 * of the worst ideas Sun ever had. Except maybe for the idea of doing
 * NFS file locking at all.
 *
 * I'm trying hard to avoid race conditions by protecting most accesses
 * to a file's list of blocked locks through a semaphore. The global
 * list of blocked locks is not protected in this fashion however.
 * Therefore, some functions (such as the RPC callback for the async grant
 * call) move blocked locks towards the head of the list *while some other
 * process might be traversing it*. This should not be a problem in
 * practice, because this will only cause functions traversing the list
 * to visit some blocks twice.
 *
 * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
 */

#include <linux/config.h>
#include <linux/types.h>
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/smp_lock.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/svc.h>
#include <linux/lockd/nlm.h>
#include <linux/lockd/lockd.h>

#define NLMDBG_FACILITY		NLMDBG_SVCLOCK

#ifdef CONFIG_LOCKD_V4
#define nlm_deadlock	nlm4_deadlock
#else
#define nlm_deadlock	nlm_lck_denied
#endif

static void	nlmsvc_insert_block(struct nlm_block *block, unsigned long);
static int	nlmsvc_remove_block(struct nlm_block *block);

static const struct rpc_call_ops nlmsvc_grant_ops;

/*
 * The list of blocked locks to retry
 */
static struct nlm_block *	nlm_blocked;

/*
 * Insert a blocked lock into the global list
 */
static void
nlmsvc_insert_block(struct nlm_block *block, unsigned long when)
{
	struct nlm_block **bp, *b;

	dprintk("lockd: nlmsvc_insert_block(%p, %ld)\n", block, when);
	if (block->b_queued)
		nlmsvc_remove_block(block);
	bp = &nlm_blocked;
	if (when != NLM_NEVER) {
		if ((when += jiffies) == NLM_NEVER)
			when ++;
		while ((b = *bp) && time_before_eq(b->b_when,when) && b->b_when != NLM_NEVER)
			bp = &b->b_next;
	} else
		while ((b = *bp) != 0)
			bp = &b->b_next;

	block->b_queued = 1;
	block->b_when = when;
	block->b_next = b;
	*bp = block;
}

/*
 * Remove a block from the global list
 */
static int
nlmsvc_remove_block(struct nlm_block *block)
{
	struct nlm_block **bp, *b;

	if (!block->b_queued)
		return 1;
	for (bp = &nlm_blocked; (b = *bp) != 0; bp = &b->b_next) {
		if (b == block) {
			*bp = block->b_next;
			block->b_queued = 0;
			return 1;
		}
	}

	return 0;
}

/*
 * Find a block for a given lock and optionally remove it from
 * the list.
 */
static struct nlm_block *
nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock, int remove)
{
	struct nlm_block	**head, *block;
	struct file_lock	*fl;

	dprintk("lockd: nlmsvc_lookup_block f=%p pd=%d %Ld-%Ld ty=%d\n",
				file, lock->fl.fl_pid,
				(long long)lock->fl.fl_start,
				(long long)lock->fl.fl_end, lock->fl.fl_type);
	for (head = &nlm_blocked; (block = *head) != 0; head = &block->b_next) {
		fl = &block->b_call.a_args.lock.fl;
		dprintk("lockd: check f=%p pd=%d %Ld-%Ld ty=%d cookie=%s\n",
				block->b_file, fl->fl_pid,
				(long long)fl->fl_start,
				(long long)fl->fl_end, fl->fl_type,
				nlmdbg_cookie2a(&block->b_call.a_args.cookie));
		if (block->b_file == file && nlm_compare_locks(fl, &lock->fl)) {
			if (remove) {
				*head = block->b_next;
				block->b_queued = 0;
			}
			return block;
		}
	}

	return NULL;
}

static inline int nlm_cookie_match(struct nlm_cookie *a, struct nlm_cookie *b)
{
	if(a->len != b->len)
		return 0;
	if(memcmp(a->data,b->data,a->len))
		return 0;
	return 1;
}

/*
 * Find a block with a given NLM cookie.
 */
static inline struct nlm_block *
nlmsvc_find_block(struct nlm_cookie *cookie,  struct sockaddr_in *sin)
{
	struct nlm_block *block;

	for (block = nlm_blocked; block; block = block->b_next) {
		dprintk("cookie: head of blocked queue %p, block %p\n", 
			nlm_blocked, block);
		if (nlm_cookie_match(&block->b_call.a_args.cookie,cookie)
				&& nlm_cmp_addr(sin, &block->b_host->h_addr))
			break;
	}

	return block;
}

/*
 * Create a block and initialize it.
 *
 * Note: we explicitly set the cookie of the grant reply to that of
 * the blocked lock request. The spec explicitly mentions that the client
 * should _not_ rely on the callback containing the same cookie as the
 * request, but (as I found out later) that's because some implementations
 * do just this. Never mind the standards comittees, they support our
 * logging industries.
 */
static inline struct nlm_block *
nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
				struct nlm_lock *lock, struct nlm_cookie *cookie)
{
	struct nlm_block	*block;
	struct nlm_host		*host;
	struct nlm_rqst		*call;

	/* Create host handle for callback */
	host = nlmclnt_lookup_host(&rqstp->rq_addr,
				rqstp->rq_prot, rqstp->rq_vers);
	if (host == NULL)
		return NULL;

	/* Allocate memory for block, and initialize arguments */
	if (!(block = (struct nlm_block *) kmalloc(sizeof(*block), GFP_KERNEL)))
		goto failed;
	memset(block, 0, sizeof(*block));
	locks_init_lock(&block->b_call.a_args.lock.fl);
	locks_init_lock(&block->b_call.a_res.lock.fl);

	if (!nlmclnt_setgrantargs(&block->b_call, lock))
		goto failed_free;

	/* Set notifier function for VFS, and init args */
	block->b_call.a_args.lock.fl.fl_lmops = &nlmsvc_lock_operations;
	block->b_call.a_args.cookie = *cookie;	/* see above */

	dprintk("lockd: created block %p...\n", block);

	/* Create and initialize the block */
	block->b_daemon = rqstp->rq_server;
	block->b_host   = host;
	block->b_file   = file;

	/* Add to file's list of blocks */
	block->b_fnext  = file->f_blocks;
	file->f_blocks  = block;

	/* Set up RPC arguments for callback */
	call = &block->b_call;
	call->a_host    = host;
	call->a_flags   = RPC_TASK_ASYNC;

	return block;

failed_free:
	kfree(block);
failed:
	nlm_release_host(host);
	return NULL;
}

/*
 * Delete a block. If the lock was cancelled or the grant callback
 * failed, unlock is set to 1.
 * It is the caller's responsibility to check whether the file
 * can be closed hereafter.
 */
static int
nlmsvc_delete_block(struct nlm_block *block, int unlock)
{
	struct file_lock	*fl = &block->b_call.a_args.lock.fl;
	struct nlm_file		*file = block->b_file;
	struct nlm_block	**bp;
	int status = 0;

	dprintk("lockd: deleting block %p...\n", block);

	/* Remove block from list */
	nlmsvc_remove_block(block);
	if (unlock)
		status = posix_unblock_lock(file->f_file, fl);

	/* If the block is in the middle of a GRANT callback,
	 * don't kill it yet. */
	if (block->b_incall) {
		nlmsvc_insert_block(block, NLM_NEVER);
		block->b_done = 1;
		return status;
	}

	/* Remove block from file's list of blocks */
	for (bp = &file->f_blocks; *bp; bp = &(*bp)->b_fnext) {
		if (*bp == block) {
			*bp = block->b_fnext;
			break;
		}
	}

	if (block->b_host)
		nlm_release_host(block->b_host);
	nlmclnt_freegrantargs(&block->b_call);
	kfree(block);
	return status;
}

/*
 * Loop over all blocks and perform the action specified.
 * (NLM_ACT_CHECK handled by nlmsvc_inspect_file).
 */
int
nlmsvc_traverse_blocks(struct nlm_host *host, struct nlm_file *file, int action)
{
	struct nlm_block	*block, *next;
	/* XXX: Will everything get cleaned up if we don't unlock here? */

	down(&file->f_sema);
	for (block = file->f_blocks; block; block = next) {
		next = block->b_fnext;
		if (action == NLM_ACT_MARK)
			block->b_host->h_inuse = 1;
		else if (action == NLM_ACT_UNLOCK) {
			if (host == NULL || host == block->b_host)
				nlmsvc_delete_block(block, 1);
		}
	}
	up(&file->f_sema);
	return 0;
}

/*
 * Attempt to establish a lock, and if it can't be granted, block it
 * if required.
 */
u32
nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
			struct nlm_lock *lock, int wait, struct nlm_cookie *cookie)
{
	struct nlm_block	*block;
	int			error;
	u32			ret;

	dprintk("lockd: nlmsvc_lock(%s/%ld, ty=%d, pi=%d, %Ld-%Ld, bl=%d)\n",
				file->f_file->f_dentry->d_inode->i_sb->s_id,
				file->f_file->f_dentry->d_inode->i_ino,
				lock->fl.fl_type, lock->fl.fl_pid,
				(long long)lock->fl.fl_start,
				(long long)lock->fl.fl_end,
				wait);


	/* Get existing block (in case client is busy-waiting) */
	block = nlmsvc_lookup_block(file, lock, 0);

again:
	/* Lock file against concurrent access */
	down(&file->f_sema);

	error = posix_lock_file(file->f_file, &lock->fl);

	dprintk("lockd: posix_lock_file returned %d\n", error);

	if (error != -EAGAIN) {
		if (block)
			nlmsvc_delete_block(block, 0);
		up(&file->f_sema);

		switch(-error) {
		case 0:
			ret = nlm_granted;
			goto out;
		case EDEADLK:
			ret = nlm_deadlock;
			goto out;
		default:			/* includes ENOLCK */
			ret = nlm_lck_denied_nolocks;
			goto out;
		}
	}

	if (!wait) {
		ret = nlm_lck_denied;
		goto out_unlock;
	}

	/* If we don't have a block, create and initialize it. Then
	 * retry because we may have slept in kmalloc. */
	/* We have to release f_sema as nlmsvc_create_block may try to
	 * to claim it while doing host garbage collection */
	if (block == NULL) {
		up(&file->f_sema);
		dprintk("lockd: blocking on this lock (allocating).\n");
		if (!(block = nlmsvc_create_block(rqstp, file, lock, cookie)))
			return nlm_lck_denied_nolocks;
		goto again;
	}

	/* Append to list of blocked */
	nlmsvc_insert_block(block, NLM_NEVER);

	ret = nlm_lck_blocked;
out_unlock:
	up(&file->f_sema);
out:
	dprintk("lockd: nlmsvc_lock returned %u\n", ret);
	return ret;
}

/*
 * Test for presence of a conflicting lock.
 */
u32
nlmsvc_testlock(struct nlm_file *file, struct nlm_lock *lock,
				       struct nlm_lock *conflock)
{
	dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n",
				file->f_file->f_dentry->d_inode->i_sb->s_id,
				file->f_file->f_dentry->d_inode->i_ino,
				lock->fl.fl_type,
				(long long)lock->fl.fl_start,
				(long long)lock->fl.fl_end);

	if (posix_test_lock(file->f_file, &lock->fl, &conflock->fl)) {
		dprintk("lockd: conflicting lock(ty=%d, %Ld-%Ld)\n",
				conflock->fl.fl_type,
				(long long)conflock->fl.fl_start,
				(long long)conflock->fl.fl_end);
		conflock->caller = "somehost";	/* FIXME */
		conflock->oh.len = 0;		/* don't return OH info */
		conflock->svid = conflock->fl.fl_pid;
		return nlm_lck_denied;
	}

	return nlm_granted;
}

/*
 * Remove a lock.
 * This implies a CANCEL call: We send a GRANT_MSG, the client replies
 * with a GRANT_RES call which gets lost, and calls UNLOCK immediately
 * afterwards. In this case the block will still be there, and hence
 * must be removed.
 */
u32
nlmsvc_unlock(struct nlm_file *file, struct nlm_lock *lock)
{
	int	error;

	dprintk("lockd: nlmsvc_unlock(%s/%ld, pi=%d, %Ld-%Ld)\n",
				file->f_file->f_dentry->d_inode->i_sb->s_id,
				file->f_file->f_dentry->d_inode->i_ino,
				lock->fl.fl_pid,
				(long long)lock->fl.fl_start,
				(long long)lock->fl.fl_end);

	/* First, cancel any lock that might be there */
	nlmsvc_cancel_blocked(file, lock);

	lock->fl.fl_type = F_UNLCK;
	error = posix_lock_file(file->f_file, &lock->fl);

	return (error < 0)? nlm_lck_denied_nolocks : nlm_granted;
}

/*
 * Cancel a previously blocked request.
 *
 * A cancel request always overrides any grant that may currently
 * be in progress.
 * The calling procedure must check whether the file can be closed.
 */
u32
nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock)
{
	struct nlm_block	*block;
	int status = 0;

	dprintk("lockd: nlmsvc_cancel(%s/%ld, pi=%d, %Ld-%Ld)\n",
				file->f_file->f_dentry->d_inode->i_sb->s_id,
				file->f_file->f_dentry->d_inode->i_ino,
				lock->fl.fl_pid,
				(long long)lock->fl.fl_start,
				(long long)lock->fl.fl_end);

	down(&file->f_sema);
	if ((block = nlmsvc_lookup_block(file, lock, 1)) != NULL)
		status = nlmsvc_delete_block(block, 1);
	up(&file->f_sema);
	return status ? nlm_lck_denied : nlm_granted;
}

/*
 * Unblock a blocked lock request. This is a callback invoked from the
 * VFS layer when a lock on which we blocked is removed.
 *
 * This function doesn't grant the blocked lock instantly, but rather moves
 * the block to the head of nlm_blocked where it can be picked up by lockd.
 */
static void
nlmsvc_notify_blocked(struct file_lock *fl)
{
	struct nlm_block	**bp, *block;

	dprintk("lockd: VFS unblock notification for block %p\n", fl);
	for (bp = &nlm_blocked; (block = *bp) != 0; bp = &block->b_next) {
		if (nlm_compare_locks(&block->b_call.a_args.lock.fl, fl)) {
			nlmsvc_insert_block(block, 0);
			svc_wake_up(block->b_daemon);
			return;
		}
	}

	printk(KERN_WARNING "lockd: notification for unknown block!\n");
}

static int nlmsvc_same_owner(struct file_lock *fl1, struct file_lock *fl2)
{
	return fl1->fl_owner == fl2->fl_owner && fl1->fl_pid == fl2->fl_pid;
}

struct lock_manager_operations nlmsvc_lock_operations = {
	.fl_compare_owner = nlmsvc_same_owner,
	.fl_notify = nlmsvc_notify_blocked,
};

/*
 * Try to claim a lock that was previously blocked.
 *
 * Note that we use both the RPC_GRANTED_MSG call _and_ an async
 * RPC thread when notifying the client. This seems like overkill...
 * Here's why:
 *  -	we don't want to use a synchronous RPC thread, otherwise
 *	we might find ourselves hanging on a dead portmapper.
 *  -	Some lockd implementations (e.g. HP) don't react to
 *	RPC_GRANTED calls; they seem to insist on RPC_GRANTED_MSG calls.
 */
static void
nlmsvc_grant_blocked(struct nlm_block *block)
{
	struct nlm_file		*file = block->b_file;
	struct nlm_lock		*lock = &block->b_call.a_args.lock;
	int			error;

	dprintk("lockd: grant blocked lock %p\n", block);

	/* First thing is lock the file */
	down(&file->f_sema);

	/* Unlink block request from list */
	nlmsvc_remove_block(block);

	/* If b_granted is true this means we've been here before.
	 * Just retry the grant callback, possibly refreshing the RPC
	 * binding */
	if (block->b_granted) {
		nlm_rebind_host(block->b_host);
		goto callback;
	}

	/* Try the lock operation again */
	error = posix_lock_file(file->f_file, &lock->fl);
	switch (error) {
	case 0:
		break;
	case -EAGAIN:
		dprintk("lockd: lock still blocked\n");
		nlmsvc_insert_block(block, NLM_NEVER);
		goto out_unlock;
	default:
		printk(KERN_WARNING "lockd: unexpected error %d in %s!\n",
				-error, __FUNCTION__);
		nlmsvc_insert_block(block, 10 * HZ);
		goto out_unlock;
	}

callback:
	/* Lock was granted by VFS. */
	dprintk("lockd: GRANTing blocked lock.\n");
	block->b_granted = 1;
	block->b_incall  = 1;

	/* Schedule next grant callback in 30 seconds */
	nlmsvc_insert_block(block, 30 * HZ);

	/* Call the client */
	nlm_get_host(block->b_call.a_host);
	if (nlmsvc_async_call(&block->b_call, NLMPROC_GRANTED_MSG,
						&nlmsvc_grant_ops) < 0)
		nlm_release_host(block->b_call.a_host);
out_unlock:
	up(&file->f_sema);
}

/*
 * This is the callback from the RPC layer when the NLM_GRANTED_MSG
 * RPC call has succeeded or timed out.
 * Like all RPC callbacks, it is invoked by the rpciod process, so it
 * better not sleep. Therefore, we put the blocked lock on the nlm_blocked
 * chain once more in order to have it removed by lockd itself (which can
 * then sleep on the file semaphore without disrupting e.g. the nfs client).
 */
static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
{
	struct nlm_rqst		*call = data;
	struct nlm_block	*block;
	unsigned long		timeout;
	struct sockaddr_in	*peer_addr = RPC_PEERADDR(task->tk_client);

	dprintk("lockd: GRANT_MSG RPC callback\n");
	dprintk("callback: looking for cookie %s, host (%u.%u.%u.%u)\n",
		nlmdbg_cookie2a(&call->a_args.cookie),
		NIPQUAD(peer_addr->sin_addr.s_addr));
	if (!(block = nlmsvc_find_block(&call->a_args.cookie, peer_addr))) {
		dprintk("lockd: no block for cookie %s, host (%u.%u.%u.%u)\n",
			nlmdbg_cookie2a(&call->a_args.cookie),
			NIPQUAD(peer_addr->sin_addr.s_addr));
		return;
	}

	/* Technically, we should down the file semaphore here. Since we
	 * move the block towards the head of the queue only, no harm
	 * can be done, though. */
	if (task->tk_status < 0) {
		/* RPC error: Re-insert for retransmission */
		timeout = 10 * HZ;
	} else if (block->b_done) {
		/* Block already removed, kill it for real */
		timeout = 0;
	} else {
		/* Call was successful, now wait for client callback */
		timeout = 60 * HZ;
	}
	nlmsvc_insert_block(block, timeout);
	svc_wake_up(block->b_daemon);
	block->b_incall = 0;

	nlm_release_host(call->a_host);
}

static const struct rpc_call_ops nlmsvc_grant_ops = {
	.rpc_call_done = nlmsvc_grant_callback,
};

/*
 * We received a GRANT_RES callback. Try to find the corresponding
 * block.
 */
void
nlmsvc_grant_reply(struct svc_rqst *rqstp, struct nlm_cookie *cookie, u32 status)
{
	struct nlm_block	*block;
	struct nlm_file		*file;

	dprintk("grant_reply: looking for cookie %x, host (%08x), s=%d \n", 
		*(unsigned int *)(cookie->data), 
		ntohl(rqstp->rq_addr.sin_addr.s_addr), status);
	if (!(block = nlmsvc_find_block(cookie, &rqstp->rq_addr)))
		return;
	file = block->b_file;

	file->f_count++;
	down(&file->f_sema);
	block = nlmsvc_find_block(cookie, &rqstp->rq_addr);
	if (block) {
		if (status == NLM_LCK_DENIED_GRACE_PERIOD) {
			/* Try again in a couple of seconds */
			nlmsvc_insert_block(block, 10 * HZ);
			up(&file->f_sema);
		} else {
			/* Lock is now held by client, or has been rejected.
			 * In both cases, the block should be removed. */
			up(&file->f_sema);
			if (status == NLM_LCK_GRANTED)
				nlmsvc_delete_block(block, 0);
			else
				nlmsvc_delete_block(block, 1);
		}
	}
	nlm_release_file(file);
}

/*
 * Retry all blocked locks that have been notified. This is where lockd
 * picks up locks that can be granted, or grant notifications that must
 * be retransmitted.
 */
unsigned long
nlmsvc_retry_blocked(void)
{
	struct nlm_block	*block;

	dprintk("nlmsvc_retry_blocked(%p, when=%ld)\n",
			nlm_blocked,
			nlm_blocked? nlm_blocked->b_when : 0);
	while ((block = nlm_blocked) != 0) {
		if (block->b_when == NLM_NEVER)
			break;
	        if (time_after(block->b_when,jiffies))
			break;
		dprintk("nlmsvc_retry_blocked(%p, when=%ld, done=%d)\n",
			block, block->b_when, block->b_done);
		if (block->b_done)
			nlmsvc_delete_block(block, 0);
		else
			nlmsvc_grant_blocked(block);
	}

	if ((block = nlm_blocked) && block->b_when != NLM_NEVER)
		return (block->b_when - jiffies);

	return MAX_SCHEDULE_TIMEOUT;
}