kernel_optimize_test/include/linux/virtio.h
Christian Borntraeger 4265f161b6 virtio: fix race in enable_cb
There is a race in virtio_net, dealing with disabling/enabling the callback.
I saw the following oops:

kernel BUG at /space/kvm/drivers/virtio/virtio_ring.c:218!
illegal operation: 0001 [#1] SMP
Modules linked in: sunrpc dm_mod
CPU: 2 Not tainted 2.6.25-rc1zlive-host-10623-gd358142-dirty #99
Process swapper (pid: 0, task: 000000000f85a610, ksp: 000000000f873c60)
Krnl PSW : 0404300180000000 00000000002b81a6 (vring_disable_cb+0x16/0x20)
           R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:0 CC:3 PM:0 EA:3
Krnl GPRS: 0000000000000001 0000000000000001 0000000010005800 0000000000000001
           000000000f3a0900 000000000f85a610 0000000000000000 0000000000000000
           0000000000000000 000000000f870000 0000000000000000 0000000000001237
           000000000f3a0920 000000000010ff74 00000000002846f6 000000000fa0bcd8
Krnl Code: 00000000002b819a: a7110001           tmll    %r1,1
           00000000002b819e: a7840004           brc     8,2b81a6
           00000000002b81a2: a7f40001           brc     15,2b81a4
          >00000000002b81a6: a51b0001           oill    %r1,1
           00000000002b81aa: 40102000           sth     %r1,0(%r2)
           00000000002b81ae: 07fe               bcr     15,%r14
           00000000002b81b0: eb7ff0380024       stmg    %r7,%r15,56(%r15)
           00000000002b81b6: a7f13e00           tmll    %r15,15872
Call Trace:
([<000000000fa0bcd0>] 0xfa0bcd0)
 [<00000000002b8350>] vring_interrupt+0x5c/0x6c
 [<000000000010ab08>] do_extint+0xb8/0xf0
 [<0000000000110716>] ext_no_vtime+0x16/0x1a
 [<0000000000107e72>] cpu_idle+0x1c2/0x1e0

The problem can be triggered with a high amount of host->guest traffic.
I think its the following race:

poll says netif_rx_complete
poll calls enable_cb
enable_cb opens the interrupt mask
a new packet comes, an interrupt is triggered----\
enable_cb sees that there is more work           |
enable_cb disables the interrupt                 |
       .                                         V
       .                            interrupt is delivered
       .                            skb_recv_done does atomic napi test, ok
 some waiting                       disable_cb is called->check fails->bang!
       .
poll would do napi check
poll would do disable_cb

The fix is to let enable_cb not disable the interrupt again, but expect the
caller to do the cleanup if it returns false. In that case, the interrupt is
only disabled, if the napi test_set_bit was successful.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (cleaned up doco)
2008-03-17 22:58:21 +11:00

111 lines
3.7 KiB
C

#ifndef _LINUX_VIRTIO_H
#define _LINUX_VIRTIO_H
/* Everything a virtio driver needs to work with any particular virtio
* implementation. */
#include <linux/types.h>
#include <linux/scatterlist.h>
#include <linux/spinlock.h>
#include <linux/device.h>
#include <linux/mod_devicetable.h>
/**
* virtqueue - a queue to register buffers for sending or receiving.
* @callback: the function to call when buffers are consumed (can be NULL).
* @vdev: the virtio device this queue was created for.
* @vq_ops: the operations for this virtqueue (see below).
* @priv: a pointer for the virtqueue implementation to use.
*/
struct virtqueue
{
void (*callback)(struct virtqueue *vq);
struct virtio_device *vdev;
struct virtqueue_ops *vq_ops;
void *priv;
};
/**
* virtqueue_ops - operations for virtqueue abstraction layer
* @add_buf: expose buffer to other end
* vq: the struct virtqueue we're talking about.
* sg: the description of the buffer(s).
* out_num: the number of sg readable by other side
* in_num: the number of sg which are writable (after readable ones)
* data: the token identifying the buffer.
* Returns 0 or an error.
* @kick: update after add_buf
* vq: the struct virtqueue
* After one or more add_buf calls, invoke this to kick the other side.
* @get_buf: get the next used buffer
* vq: the struct virtqueue we're talking about.
* len: the length written into the buffer
* Returns NULL or the "data" token handed to add_buf.
* @disable_cb: disable callbacks
* vq: the struct virtqueue we're talking about.
* @enable_cb: restart callbacks after disable_cb.
* vq: the struct virtqueue we're talking about.
* This re-enables callbacks; it returns "false" if there are pending
* buffers in the queue, to detect a possible race between the driver
* checking for more work, and enabling callbacks.
*
* Locking rules are straightforward: the driver is responsible for
* locking. No two operations may be invoked simultaneously.
*
* All operations can be called in any context.
*/
struct virtqueue_ops {
int (*add_buf)(struct virtqueue *vq,
struct scatterlist sg[],
unsigned int out_num,
unsigned int in_num,
void *data);
void (*kick)(struct virtqueue *vq);
void *(*get_buf)(struct virtqueue *vq, unsigned int *len);
void (*disable_cb)(struct virtqueue *vq);
bool (*enable_cb)(struct virtqueue *vq);
};
/**
* virtio_device - representation of a device using virtio
* @index: unique position on the virtio bus
* @dev: underlying device.
* @id: the device type identification (used to match it with a driver).
* @config: the configuration ops for this device.
* @priv: private pointer for the driver's use.
*/
struct virtio_device
{
int index;
struct device dev;
struct virtio_device_id id;
struct virtio_config_ops *config;
void *priv;
};
int register_virtio_device(struct virtio_device *dev);
void unregister_virtio_device(struct virtio_device *dev);
/**
* virtio_driver - operations for a virtio I/O driver
* @driver: underlying device driver (populate name and owner).
* @id_table: the ids serviced by this driver.
* @probe: the function to call when a device is found. Returns a token for
* remove, or PTR_ERR().
* @remove: the function when a device is removed.
* @config_changed: optional function to call when the device configuration
* changes; may be called in interrupt context.
*/
struct virtio_driver {
struct device_driver driver;
const struct virtio_device_id *id_table;
int (*probe)(struct virtio_device *dev);
void (*remove)(struct virtio_device *dev);
void (*config_changed)(struct virtio_device *dev);
};
int register_virtio_driver(struct virtio_driver *drv);
void unregister_virtio_driver(struct virtio_driver *drv);
#endif /* _LINUX_VIRTIO_H */