forked from luck/tmp_suning_uos_patched
5d4600017b
[ Upstream commit de3510e52b0a398261271455562458003b8eea62 ] Memory backed or zoned null block devices may generate actual request timeout errors due to the submission path being blocked on memory allocation or zone locking. Unlike fake timeouts or injected timeouts, the request submission path will call blk_mq_complete_request() or blk_mq_end_request() for these real timeout errors, causing a double completion and use after free situation as the block layer timeout handler executes blk_mq_rq_timed_out() and __blk_mq_free_request() in blk_mq_check_expired(). This problem often triggers a NULL pointer dereference such as: BUG: kernel NULL pointer dereference, address: 0000000000000050 RIP: 0010:blk_mq_sched_mark_restart_hctx+0x5/0x20 ... Call Trace: dd_finish_request+0x56/0x80 blk_mq_free_request+0x37/0x130 null_handle_cmd+0xbf/0x250 [null_blk] ? null_queue_rq+0x67/0xd0 [null_blk] blk_mq_dispatch_rq_list+0x122/0x850 __blk_mq_do_dispatch_sched+0xbb/0x2c0 __blk_mq_sched_dispatch_requests+0x13d/0x190 blk_mq_sched_dispatch_requests+0x30/0x60 __blk_mq_run_hw_queue+0x49/0x90 process_one_work+0x26c/0x580 worker_thread+0x55/0x3c0 ? process_one_work+0x580/0x580 kthread+0x134/0x150 ? kthread_create_worker_on_cpu+0x70/0x70 ret_from_fork+0x1f/0x30 This problem very often triggers when running the full btrfs xfstests on a memory-backed zoned null block device in a VM with limited amount of memory. Avoid this by executing blk_mq_complete_request() in null_timeout_rq() only for commands that are marked for a fake timeout completion using the fake_timeout boolean in struct null_cmd. For timeout errors injected through debugfs, the timeout handler will execute blk_mq_complete_request()i as before. This is safe as the submission path does not execute complete requests in this case. In null_timeout_rq(), also make sure to set the command error field to BLK_STS_TIMEOUT and to propagate this error through to the request completion. Reported-by: Johannes Thumshirn <Johannes.Thumshirn@wdc.com> Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com> Tested-by: Johannes Thumshirn <Johannes.Thumshirn@wdc.com> Reviewed-by: Johannes Thumshirn <Johannes.Thumshirn@wdc.com> Link: https://lore.kernel.org/r/20210331225244.126426-1-damien.lemoal@wdc.com Signed-off-by: Jens Axboe <axboe@kernel.dk> Signed-off-by: Sasha Levin <sashal@kernel.org>
138 lines
4.2 KiB
C
138 lines
4.2 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef __BLK_NULL_BLK_H
|
|
#define __BLK_NULL_BLK_H
|
|
|
|
#undef pr_fmt
|
|
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
|
|
|
#include <linux/blkdev.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/blk-mq.h>
|
|
#include <linux/hrtimer.h>
|
|
#include <linux/configfs.h>
|
|
#include <linux/badblocks.h>
|
|
#include <linux/fault-inject.h>
|
|
|
|
struct nullb_cmd {
|
|
struct request *rq;
|
|
struct bio *bio;
|
|
unsigned int tag;
|
|
blk_status_t error;
|
|
struct nullb_queue *nq;
|
|
struct hrtimer timer;
|
|
bool fake_timeout;
|
|
};
|
|
|
|
struct nullb_queue {
|
|
unsigned long *tag_map;
|
|
wait_queue_head_t wait;
|
|
unsigned int queue_depth;
|
|
struct nullb_device *dev;
|
|
unsigned int requeue_selection;
|
|
|
|
struct nullb_cmd *cmds;
|
|
};
|
|
|
|
struct nullb_device {
|
|
struct nullb *nullb;
|
|
struct config_item item;
|
|
struct radix_tree_root data; /* data stored in the disk */
|
|
struct radix_tree_root cache; /* disk cache data */
|
|
unsigned long flags; /* device flags */
|
|
unsigned int curr_cache;
|
|
struct badblocks badblocks;
|
|
|
|
unsigned int nr_zones;
|
|
unsigned int nr_zones_imp_open;
|
|
unsigned int nr_zones_exp_open;
|
|
unsigned int nr_zones_closed;
|
|
struct blk_zone *zones;
|
|
sector_t zone_size_sects;
|
|
spinlock_t zone_lock;
|
|
unsigned long *zone_locks;
|
|
|
|
unsigned long size; /* device size in MB */
|
|
unsigned long completion_nsec; /* time in ns to complete a request */
|
|
unsigned long cache_size; /* disk cache size in MB */
|
|
unsigned long zone_size; /* zone size in MB if device is zoned */
|
|
unsigned long zone_capacity; /* zone capacity in MB if device is zoned */
|
|
unsigned int zone_nr_conv; /* number of conventional zones */
|
|
unsigned int zone_max_open; /* max number of open zones */
|
|
unsigned int zone_max_active; /* max number of active zones */
|
|
unsigned int submit_queues; /* number of submission queues */
|
|
unsigned int home_node; /* home node for the device */
|
|
unsigned int queue_mode; /* block interface */
|
|
unsigned int blocksize; /* block size */
|
|
unsigned int irqmode; /* IRQ completion handler */
|
|
unsigned int hw_queue_depth; /* queue depth */
|
|
unsigned int index; /* index of the disk, only valid with a disk */
|
|
unsigned int mbps; /* Bandwidth throttle cap (in MB/s) */
|
|
bool blocking; /* blocking blk-mq device */
|
|
bool use_per_node_hctx; /* use per-node allocation for hardware context */
|
|
bool power; /* power on/off the device */
|
|
bool memory_backed; /* if data is stored in memory */
|
|
bool discard; /* if support discard */
|
|
bool zoned; /* if device is zoned */
|
|
};
|
|
|
|
struct nullb {
|
|
struct nullb_device *dev;
|
|
struct list_head list;
|
|
unsigned int index;
|
|
struct request_queue *q;
|
|
struct gendisk *disk;
|
|
struct blk_mq_tag_set *tag_set;
|
|
struct blk_mq_tag_set __tag_set;
|
|
unsigned int queue_depth;
|
|
atomic_long_t cur_bytes;
|
|
struct hrtimer bw_timer;
|
|
unsigned long cache_flush_pos;
|
|
spinlock_t lock;
|
|
|
|
struct nullb_queue *queues;
|
|
unsigned int nr_queues;
|
|
char disk_name[DISK_NAME_LEN];
|
|
};
|
|
|
|
blk_status_t null_process_cmd(struct nullb_cmd *cmd,
|
|
enum req_opf op, sector_t sector,
|
|
unsigned int nr_sectors);
|
|
|
|
#ifdef CONFIG_BLK_DEV_ZONED
|
|
int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q);
|
|
int null_register_zoned_dev(struct nullb *nullb);
|
|
void null_free_zoned_dev(struct nullb_device *dev);
|
|
int null_report_zones(struct gendisk *disk, sector_t sector,
|
|
unsigned int nr_zones, report_zones_cb cb, void *data);
|
|
blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd,
|
|
enum req_opf op, sector_t sector,
|
|
sector_t nr_sectors);
|
|
size_t null_zone_valid_read_len(struct nullb *nullb,
|
|
sector_t sector, unsigned int len);
|
|
#else
|
|
static inline int null_init_zoned_dev(struct nullb_device *dev,
|
|
struct request_queue *q)
|
|
{
|
|
pr_err("CONFIG_BLK_DEV_ZONED not enabled\n");
|
|
return -EINVAL;
|
|
}
|
|
static inline int null_register_zoned_dev(struct nullb *nullb)
|
|
{
|
|
return -ENODEV;
|
|
}
|
|
static inline void null_free_zoned_dev(struct nullb_device *dev) {}
|
|
static inline blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd,
|
|
enum req_opf op, sector_t sector, sector_t nr_sectors)
|
|
{
|
|
return BLK_STS_NOTSUPP;
|
|
}
|
|
static inline size_t null_zone_valid_read_len(struct nullb *nullb,
|
|
sector_t sector,
|
|
unsigned int len)
|
|
{
|
|
return len;
|
|
}
|
|
#define null_report_zones NULL
|
|
#endif /* CONFIG_BLK_DEV_ZONED */
|
|
#endif /* __NULL_BLK_H */
|