kernel_optimize_test/net/rxrpc/conn_event.c
David Howells 5ac0d62226 rxrpc: Fix missing notification
Under some circumstances, rxrpc will fail a transmit a packet through the
underlying UDP socket (ie. UDP sendmsg returns an error).  This may result
in a call getting stuck.

In the instance being seen, where AFS tries to send a probe to the Volume
Location server, tracepoints show the UDP Tx failure (in this case returing
error 99 EADDRNOTAVAIL) and then nothing more:

 afs_make_vl_call: c=0000015d VL.GetCapabilities
 rxrpc_call: c=0000015d NWc u=1 sp=rxrpc_kernel_begin_call+0x106/0x170 [rxrpc] a=00000000dd89ee8a
 rxrpc_call: c=0000015d Gus u=2 sp=rxrpc_new_client_call+0x14f/0x580 [rxrpc] a=00000000e20e4b08
 rxrpc_call: c=0000015d SEE u=2 sp=rxrpc_activate_one_channel+0x7b/0x1c0 [rxrpc] a=00000000e20e4b08
 rxrpc_call: c=0000015d CON u=2 sp=rxrpc_kernel_begin_call+0x106/0x170 [rxrpc] a=00000000e20e4b08
 rxrpc_tx_fail: c=0000015d r=1 ret=-99 CallDataNofrag

The problem is that if the initial packet fails and the retransmission
timer hasn't been started, the call is set to completed and an error is
returned from rxrpc_send_data_packet() to rxrpc_queue_packet().  Though
rxrpc_instant_resend() is called, this does nothing because the call is
marked completed.

So rxrpc_notify_socket() isn't called and the error is passed back up to
rxrpc_send_data(), rxrpc_kernel_send_data() and thence to afs_make_call()
and afs_vl_get_capabilities() where it is simply ignored because it is
assumed that the result of a probe will be collected asynchronously.

Fileserver probing is similarly affected via afs_fs_get_capabilities().

Fix this by always issuing a notification in __rxrpc_set_call_completion()
if it shifts a call to the completed state, even if an error is also
returned to the caller through the function return value.

Also put in a little bit of optimisation to avoid taking the call
state_lock and disabling softirqs if the call is already in the completed
state and remove some now redundant rxrpc_notify_socket() calls.

Fixes: f5c17aaeb2 ("rxrpc: Calls should only have one terminal state")
Reported-by: Gerry Seidman <gerry@auristor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Marc Dionne <marc.dionne@auristor.com>
2020-06-05 13:36:35 +01:00

502 lines
12 KiB
C

// SPDX-License-Identifier: GPL-2.0-or-later
/* connection-level event handling
*
* Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/net.h>
#include <linux/skbuff.h>
#include <linux/errqueue.h>
#include <net/sock.h>
#include <net/af_rxrpc.h>
#include <net/ip.h>
#include "ar-internal.h"
/*
* Retransmit terminal ACK or ABORT of the previous call.
*/
static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
struct sk_buff *skb,
unsigned int channel)
{
struct rxrpc_skb_priv *sp = skb ? rxrpc_skb(skb) : NULL;
struct rxrpc_channel *chan;
struct msghdr msg;
struct kvec iov[3];
struct {
struct rxrpc_wire_header whdr;
union {
__be32 abort_code;
struct rxrpc_ackpacket ack;
};
} __attribute__((packed)) pkt;
struct rxrpc_ackinfo ack_info;
size_t len;
int ret, ioc;
u32 serial, mtu, call_id, padding;
_enter("%d", conn->debug_id);
chan = &conn->channels[channel];
/* If the last call got moved on whilst we were waiting to run, just
* ignore this packet.
*/
call_id = READ_ONCE(chan->last_call);
/* Sync with __rxrpc_disconnect_call() */
smp_rmb();
if (skb && call_id != sp->hdr.callNumber)
return;
msg.msg_name = &conn->params.peer->srx.transport;
msg.msg_namelen = conn->params.peer->srx.transport_len;
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags = 0;
iov[0].iov_base = &pkt;
iov[0].iov_len = sizeof(pkt.whdr);
iov[1].iov_base = &padding;
iov[1].iov_len = 3;
iov[2].iov_base = &ack_info;
iov[2].iov_len = sizeof(ack_info);
pkt.whdr.epoch = htonl(conn->proto.epoch);
pkt.whdr.cid = htonl(conn->proto.cid | channel);
pkt.whdr.callNumber = htonl(call_id);
pkt.whdr.seq = 0;
pkt.whdr.type = chan->last_type;
pkt.whdr.flags = conn->out_clientflag;
pkt.whdr.userStatus = 0;
pkt.whdr.securityIndex = conn->security_ix;
pkt.whdr._rsvd = 0;
pkt.whdr.serviceId = htons(conn->service_id);
len = sizeof(pkt.whdr);
switch (chan->last_type) {
case RXRPC_PACKET_TYPE_ABORT:
pkt.abort_code = htonl(chan->last_abort);
iov[0].iov_len += sizeof(pkt.abort_code);
len += sizeof(pkt.abort_code);
ioc = 1;
break;
case RXRPC_PACKET_TYPE_ACK:
mtu = conn->params.peer->if_mtu;
mtu -= conn->params.peer->hdrsize;
pkt.ack.bufferSpace = 0;
pkt.ack.maxSkew = htons(skb ? skb->priority : 0);
pkt.ack.firstPacket = htonl(chan->last_seq + 1);
pkt.ack.previousPacket = htonl(chan->last_seq);
pkt.ack.serial = htonl(skb ? sp->hdr.serial : 0);
pkt.ack.reason = skb ? RXRPC_ACK_DUPLICATE : RXRPC_ACK_IDLE;
pkt.ack.nAcks = 0;
ack_info.rxMTU = htonl(rxrpc_rx_mtu);
ack_info.maxMTU = htonl(mtu);
ack_info.rwind = htonl(rxrpc_rx_window_size);
ack_info.jumbo_max = htonl(rxrpc_rx_jumbo_max);
pkt.whdr.flags |= RXRPC_SLOW_START_OK;
padding = 0;
iov[0].iov_len += sizeof(pkt.ack);
len += sizeof(pkt.ack) + 3 + sizeof(ack_info);
ioc = 3;
break;
default:
return;
}
/* Resync with __rxrpc_disconnect_call() and check that the last call
* didn't get advanced whilst we were filling out the packets.
*/
smp_rmb();
if (READ_ONCE(chan->last_call) != call_id)
return;
serial = atomic_inc_return(&conn->serial);
pkt.whdr.serial = htonl(serial);
switch (chan->last_type) {
case RXRPC_PACKET_TYPE_ABORT:
_proto("Tx ABORT %%%u { %d } [re]", serial, conn->abort_code);
break;
case RXRPC_PACKET_TYPE_ACK:
trace_rxrpc_tx_ack(chan->call_debug_id, serial,
ntohl(pkt.ack.firstPacket),
ntohl(pkt.ack.serial),
pkt.ack.reason, 0);
_proto("Tx ACK %%%u [re]", serial);
break;
}
ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, ioc, len);
conn->params.peer->last_tx_at = ktime_get_seconds();
if (ret < 0)
trace_rxrpc_tx_fail(chan->call_debug_id, serial, ret,
rxrpc_tx_point_call_final_resend);
else
trace_rxrpc_tx_packet(chan->call_debug_id, &pkt.whdr,
rxrpc_tx_point_call_final_resend);
_leave("");
}
/*
* pass a connection-level abort onto all calls on that connection
*/
static void rxrpc_abort_calls(struct rxrpc_connection *conn,
enum rxrpc_call_completion compl,
rxrpc_serial_t serial)
{
struct rxrpc_call *call;
int i;
_enter("{%d},%x", conn->debug_id, conn->abort_code);
spin_lock(&conn->channel_lock);
for (i = 0; i < RXRPC_MAXCALLS; i++) {
call = rcu_dereference_protected(
conn->channels[i].call,
lockdep_is_held(&conn->channel_lock));
if (call) {
if (compl == RXRPC_CALL_LOCALLY_ABORTED)
trace_rxrpc_abort(call->debug_id,
"CON", call->cid,
call->call_id, 0,
conn->abort_code,
conn->error);
else
trace_rxrpc_rx_abort(call, serial,
conn->abort_code);
rxrpc_set_call_completion(call, compl,
conn->abort_code,
conn->error);
}
}
spin_unlock(&conn->channel_lock);
_leave("");
}
/*
* generate a connection-level abort
*/
static int rxrpc_abort_connection(struct rxrpc_connection *conn,
int error, u32 abort_code)
{
struct rxrpc_wire_header whdr;
struct msghdr msg;
struct kvec iov[2];
__be32 word;
size_t len;
u32 serial;
int ret;
_enter("%d,,%u,%u", conn->debug_id, error, abort_code);
/* generate a connection-level abort */
spin_lock_bh(&conn->state_lock);
if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) {
spin_unlock_bh(&conn->state_lock);
_leave(" = 0 [already dead]");
return 0;
}
conn->error = error;
conn->abort_code = abort_code;
conn->state = RXRPC_CONN_LOCALLY_ABORTED;
spin_unlock_bh(&conn->state_lock);
msg.msg_name = &conn->params.peer->srx.transport;
msg.msg_namelen = conn->params.peer->srx.transport_len;
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags = 0;
whdr.epoch = htonl(conn->proto.epoch);
whdr.cid = htonl(conn->proto.cid);
whdr.callNumber = 0;
whdr.seq = 0;
whdr.type = RXRPC_PACKET_TYPE_ABORT;
whdr.flags = conn->out_clientflag;
whdr.userStatus = 0;
whdr.securityIndex = conn->security_ix;
whdr._rsvd = 0;
whdr.serviceId = htons(conn->service_id);
word = htonl(conn->abort_code);
iov[0].iov_base = &whdr;
iov[0].iov_len = sizeof(whdr);
iov[1].iov_base = &word;
iov[1].iov_len = sizeof(word);
len = iov[0].iov_len + iov[1].iov_len;
serial = atomic_inc_return(&conn->serial);
rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, serial);
whdr.serial = htonl(serial);
_proto("Tx CONN ABORT %%%u { %d }", serial, conn->abort_code);
ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len);
if (ret < 0) {
trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
rxrpc_tx_point_conn_abort);
_debug("sendmsg failed: %d", ret);
return -EAGAIN;
}
trace_rxrpc_tx_packet(conn->debug_id, &whdr, rxrpc_tx_point_conn_abort);
conn->params.peer->last_tx_at = ktime_get_seconds();
_leave(" = 0");
return 0;
}
/*
* mark a call as being on a now-secured channel
* - must be called with BH's disabled.
*/
static void rxrpc_call_is_secure(struct rxrpc_call *call)
{
_enter("%p", call);
if (call) {
write_lock_bh(&call->state_lock);
if (call->state == RXRPC_CALL_SERVER_SECURING) {
call->state = RXRPC_CALL_SERVER_ACCEPTING;
rxrpc_notify_socket(call);
}
write_unlock_bh(&call->state_lock);
}
}
/*
* connection-level Rx packet processor
*/
static int rxrpc_process_event(struct rxrpc_connection *conn,
struct sk_buff *skb,
u32 *_abort_code)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
__be32 wtmp;
u32 abort_code;
int loop, ret;
if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) {
_leave(" = -ECONNABORTED [%u]", conn->state);
return -ECONNABORTED;
}
_enter("{%d},{%u,%%%u},", conn->debug_id, sp->hdr.type, sp->hdr.serial);
switch (sp->hdr.type) {
case RXRPC_PACKET_TYPE_DATA:
case RXRPC_PACKET_TYPE_ACK:
rxrpc_conn_retransmit_call(conn, skb,
sp->hdr.cid & RXRPC_CHANNELMASK);
return 0;
case RXRPC_PACKET_TYPE_BUSY:
/* Just ignore BUSY packets for now. */
return 0;
case RXRPC_PACKET_TYPE_ABORT:
if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
&wtmp, sizeof(wtmp)) < 0) {
trace_rxrpc_rx_eproto(NULL, sp->hdr.serial,
tracepoint_string("bad_abort"));
return -EPROTO;
}
abort_code = ntohl(wtmp);
_proto("Rx ABORT %%%u { ac=%d }", sp->hdr.serial, abort_code);
conn->error = -ECONNABORTED;
conn->abort_code = abort_code;
conn->state = RXRPC_CONN_REMOTELY_ABORTED;
rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, sp->hdr.serial);
return -ECONNABORTED;
case RXRPC_PACKET_TYPE_CHALLENGE:
return conn->security->respond_to_challenge(conn, skb,
_abort_code);
case RXRPC_PACKET_TYPE_RESPONSE:
ret = conn->security->verify_response(conn, skb, _abort_code);
if (ret < 0)
return ret;
ret = conn->security->init_connection_security(conn);
if (ret < 0)
return ret;
ret = conn->security->prime_packet_security(conn);
if (ret < 0)
return ret;
spin_lock(&conn->channel_lock);
spin_lock(&conn->state_lock);
if (conn->state == RXRPC_CONN_SERVICE_CHALLENGING) {
conn->state = RXRPC_CONN_SERVICE;
spin_unlock(&conn->state_lock);
for (loop = 0; loop < RXRPC_MAXCALLS; loop++)
rxrpc_call_is_secure(
rcu_dereference_protected(
conn->channels[loop].call,
lockdep_is_held(&conn->channel_lock)));
} else {
spin_unlock(&conn->state_lock);
}
spin_unlock(&conn->channel_lock);
return 0;
default:
trace_rxrpc_rx_eproto(NULL, sp->hdr.serial,
tracepoint_string("bad_conn_pkt"));
return -EPROTO;
}
}
/*
* set up security and issue a challenge
*/
static void rxrpc_secure_connection(struct rxrpc_connection *conn)
{
u32 abort_code;
int ret;
_enter("{%d}", conn->debug_id);
ASSERT(conn->security_ix != 0);
ASSERT(conn->server_key);
if (conn->security->issue_challenge(conn) < 0) {
abort_code = RX_CALL_DEAD;
ret = -ENOMEM;
goto abort;
}
_leave("");
return;
abort:
_debug("abort %d, %d", ret, abort_code);
rxrpc_abort_connection(conn, ret, abort_code);
_leave(" [aborted]");
}
/*
* Process delayed final ACKs that we haven't subsumed into a subsequent call.
*/
static void rxrpc_process_delayed_final_acks(struct rxrpc_connection *conn)
{
unsigned long j = jiffies, next_j;
unsigned int channel;
bool set;
again:
next_j = j + LONG_MAX;
set = false;
for (channel = 0; channel < RXRPC_MAXCALLS; channel++) {
struct rxrpc_channel *chan = &conn->channels[channel];
unsigned long ack_at;
if (!test_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags))
continue;
smp_rmb(); /* vs rxrpc_disconnect_client_call */
ack_at = READ_ONCE(chan->final_ack_at);
if (time_before(j, ack_at)) {
if (time_before(ack_at, next_j)) {
next_j = ack_at;
set = true;
}
continue;
}
if (test_and_clear_bit(RXRPC_CONN_FINAL_ACK_0 + channel,
&conn->flags))
rxrpc_conn_retransmit_call(conn, NULL, channel);
}
j = jiffies;
if (time_before_eq(next_j, j))
goto again;
if (set)
rxrpc_reduce_conn_timer(conn, next_j);
}
/*
* connection-level event processor
*/
static void rxrpc_do_process_connection(struct rxrpc_connection *conn)
{
struct sk_buff *skb;
u32 abort_code = RX_PROTOCOL_ERROR;
int ret;
if (test_and_clear_bit(RXRPC_CONN_EV_CHALLENGE, &conn->events))
rxrpc_secure_connection(conn);
/* Process delayed ACKs whose time has come. */
if (conn->flags & RXRPC_CONN_FINAL_ACK_MASK)
rxrpc_process_delayed_final_acks(conn);
/* go through the conn-level event packets, releasing the ref on this
* connection that each one has when we've finished with it */
while ((skb = skb_dequeue(&conn->rx_queue))) {
rxrpc_see_skb(skb, rxrpc_skb_seen);
ret = rxrpc_process_event(conn, skb, &abort_code);
switch (ret) {
case -EPROTO:
case -EKEYEXPIRED:
case -EKEYREJECTED:
goto protocol_error;
case -ENOMEM:
case -EAGAIN:
goto requeue_and_leave;
case -ECONNABORTED:
default:
rxrpc_free_skb(skb, rxrpc_skb_freed);
break;
}
}
return;
requeue_and_leave:
skb_queue_head(&conn->rx_queue, skb);
return;
protocol_error:
if (rxrpc_abort_connection(conn, ret, abort_code) < 0)
goto requeue_and_leave;
rxrpc_free_skb(skb, rxrpc_skb_freed);
return;
}
void rxrpc_process_connection(struct work_struct *work)
{
struct rxrpc_connection *conn =
container_of(work, struct rxrpc_connection, processor);
rxrpc_see_connection(conn);
if (__rxrpc_use_local(conn->params.local)) {
rxrpc_do_process_connection(conn);
rxrpc_unuse_local(conn->params.local);
}
rxrpc_put_connection(conn);
_leave("");
return;
}