kernel_optimize_test/drivers/net/s2io.h

1025 lines
28 KiB
C
Raw Normal View History

/************************************************************************
* s2io.h: A Linux PCI-X Ethernet driver for Neterion 10GbE Server NIC
* Copyright(c) 2002-2005 Neterion Inc.
* This software may be used and distributed according to the terms of
* the GNU General Public License (GPL), incorporated herein by reference.
* Drivers based on or derived from this code fall under the GPL and must
* retain the authorship, copyright and license notice. This file is not
* a complete program and may only be used when the entire operating
* system is licensed under the GPL.
* See the file COPYING in this distribution for more information.
************************************************************************/
#ifndef _S2IO_H
#define _S2IO_H
#define TBD 0
#define BIT(loc) (0x8000000000000000ULL >> (loc))
#define vBIT(val, loc, sz) (((u64)val) << (64-loc-sz))
#define INV(d) ((d&0xff)<<24) | (((d>>8)&0xff)<<16) | (((d>>16)&0xff)<<8)| ((d>>24)&0xff)
#ifndef BOOL
#define BOOL int
#endif
#ifndef TRUE
#define TRUE 1
#define FALSE 0
#endif
#undef SUCCESS
#define SUCCESS 0
#define FAILURE -1
#define CHECKBIT(value, nbit) (value & (1 << nbit))
/* Maximum time to flicker LED when asked to identify NIC using ethtool */
#define MAX_FLICKER_TIME 60000 /* 60 Secs */
/* Maximum outstanding splits to be configured into xena. */
typedef enum xena_max_outstanding_splits {
XENA_ONE_SPLIT_TRANSACTION = 0,
XENA_TWO_SPLIT_TRANSACTION = 1,
XENA_THREE_SPLIT_TRANSACTION = 2,
XENA_FOUR_SPLIT_TRANSACTION = 3,
XENA_EIGHT_SPLIT_TRANSACTION = 4,
XENA_TWELVE_SPLIT_TRANSACTION = 5,
XENA_SIXTEEN_SPLIT_TRANSACTION = 6,
XENA_THIRTYTWO_SPLIT_TRANSACTION = 7
} xena_max_outstanding_splits;
#define XENA_MAX_OUTSTANDING_SPLITS(n) (n << 4)
/* OS concerned variables and constants */
#define WATCH_DOG_TIMEOUT 15*HZ
#define EFILL 0x1234
#define ALIGN_SIZE 127
#define PCIX_COMMAND_REGISTER 0x62
/*
* Debug related variables.
*/
/* different debug levels. */
#define ERR_DBG 0
#define INIT_DBG 1
#define INFO_DBG 2
#define TX_DBG 3
#define INTR_DBG 4
/* Global variable that defines the present debug level of the driver. */
static int debug_level = ERR_DBG;
/* DEBUG message print. */
#define DBG_PRINT(dbg_level, args...) if(!(debug_level<dbg_level)) printk(args)
/* Protocol assist features of the NIC */
#define L3_CKSUM_OK 0xFFFF
#define L4_CKSUM_OK 0xFFFF
#define S2IO_JUMBO_SIZE 9600
/* Driver statistics maintained by driver */
typedef struct {
unsigned long long single_ecc_errs;
unsigned long long double_ecc_errs;
unsigned long long parity_err_cnt;
unsigned long long serious_err_cnt;
unsigned long long soft_reset_cnt;
unsigned long long fifo_full_cnt;
unsigned long long ring_full_cnt;
[PATCH] S2io: Large Receive Offload (LRO) feature(v2) for Neterion (s2io) 10GbE Xframe PCI-X and PCI-E NICs Hi, Below is a patch for the Large Receive Offload feature. Please review and let us know your comments. LRO algorithm was described in an OLS 2005 presentation, located at ftp.s2io.com user: linuxdocs password: HALdocs The same ftp site has Programming Manual for Xframe-I ASIC. LRO feature is supported on Neterion Xframe-I, Xframe-II and Xframe-Express 10GbE NICs. Brief description: The Large Receive Offload(LRO) feature is a stateless offload that is complementary to TSO feature but on the receive path. The idea is to combine and collapse(upto 64K maximum) in the driver, in-sequence TCP packets belonging to the same session. It is mainly designed to improve 1500 mtu receive performance, since Jumbo frame performance is already close to 10GbE line rate. Some performance numbers are attached below. Implementation details: 1. Handle packet chains from multiple sessions(current default MAX_LRO_SESSSIONS=32). 2. Examine each packet for eligiblity to aggregate. A packet is considered eligible if it meets all the below criteria. a. It is a TCP/IP packet and L2 type is not LLC or SNAP. b. The packet has no checksum errors(L3 and L4). c. There are no IP options. The only TCP option supported is timestamps. d. Search and locate the LRO object corresponding to this socket and ensure packet is in TCP sequence. e. It's not a special packet(SYN, FIN, RST, URG, PSH etc. flags are not set). f. TCP payload is non-zero(It's not a pure ACK). g. It's not an IP-fragmented packet. 3. If a packet is found eligible, the LRO object is updated with information such as next sequence number expected, current length of aggregated packet and so on. If not eligible or max packets reached, update IP and TCP headers of first packet in the chain and pass it up to stack. 4. The frag_list in skb structure is used to chain packets into one large packet. Kernel changes required: None Performance results: Main focus of the initial testing was on 1500 mtu receiver, since this is a bottleneck not covered by the existing stateless offloads. There are couple disclaimers about the performance results below: 1. Your mileage will vary!!!! We initially concentrated on couple pci-x 2.0 platforms that are powerful enough to push 10 GbE NIC and do not have bottlenecks other than cpu%; testing on other platforms is still in progress. On some lower end systems we are seeing lower gains. 2. Current LRO implementation is still (for the most part) software based, and therefore performance potential of the feature is far from being realized. Full hw implementation of LRO is expected in the next version of Xframe ASIC. Performance delta(with MTU=1500) going from LRO disabled to enabled: IBM 2-way Xeon (x366) : 3.5 to 7.1 Gbps 2-way Opteron : 4.5 to 6.1 Gbps Signed-off-by: Ravinandan Arakali <ravinandan.arakali@neterion.com> Signed-off-by: Jeff Garzik <jgarzik@pobox.com>
2006-01-26 03:53:07 +08:00
/* LRO statistics */
unsigned long long clubbed_frms_cnt;
unsigned long long sending_both;
unsigned long long outof_sequence_pkts;
unsigned long long flush_max_pkts;
unsigned long long sum_avg_pkts_aggregated;
unsigned long long num_aggregations;
} swStat_t;
/* Xpak releated alarm and warnings */
typedef struct {
u64 alarm_transceiver_temp_high;
u64 alarm_transceiver_temp_low;
u64 alarm_laser_bias_current_high;
u64 alarm_laser_bias_current_low;
u64 alarm_laser_output_power_high;
u64 alarm_laser_output_power_low;
u64 warn_transceiver_temp_high;
u64 warn_transceiver_temp_low;
u64 warn_laser_bias_current_high;
u64 warn_laser_bias_current_low;
u64 warn_laser_output_power_high;
u64 warn_laser_output_power_low;
u64 xpak_regs_stat;
u32 xpak_timer_count;
} xpakStat_t;
/* The statistics block of Xena */
typedef struct stat_block {
/* Tx MAC statistics counters. */
__le32 tmac_data_octets;
__le32 tmac_frms;
__le64 tmac_drop_frms;
__le32 tmac_bcst_frms;
__le32 tmac_mcst_frms;
__le64 tmac_pause_ctrl_frms;
__le32 tmac_ucst_frms;
__le32 tmac_ttl_octets;
__le32 tmac_any_err_frms;
__le32 tmac_nucst_frms;
__le64 tmac_ttl_less_fb_octets;
__le64 tmac_vld_ip_octets;
__le32 tmac_drop_ip;
__le32 tmac_vld_ip;
__le32 tmac_rst_tcp;
__le32 tmac_icmp;
__le64 tmac_tcp;
__le32 reserved_0;
__le32 tmac_udp;
/* Rx MAC Statistics counters. */
__le32 rmac_data_octets;
__le32 rmac_vld_frms;
__le64 rmac_fcs_err_frms;
__le64 rmac_drop_frms;
__le32 rmac_vld_bcst_frms;
__le32 rmac_vld_mcst_frms;
__le32 rmac_out_rng_len_err_frms;
__le32 rmac_in_rng_len_err_frms;
__le64 rmac_long_frms;
__le64 rmac_pause_ctrl_frms;
__le64 rmac_unsup_ctrl_frms;
__le32 rmac_accepted_ucst_frms;
__le32 rmac_ttl_octets;
__le32 rmac_discarded_frms;
__le32 rmac_accepted_nucst_frms;
__le32 reserved_1;
__le32 rmac_drop_events;
__le64 rmac_ttl_less_fb_octets;
__le64 rmac_ttl_frms;
__le64 reserved_2;
__le32 rmac_usized_frms;
__le32 reserved_3;
__le32 rmac_frag_frms;
__le32 rmac_osized_frms;
__le32 reserved_4;
__le32 rmac_jabber_frms;
__le64 rmac_ttl_64_frms;
__le64 rmac_ttl_65_127_frms;
__le64 reserved_5;
__le64 rmac_ttl_128_255_frms;
__le64 rmac_ttl_256_511_frms;
__le64 reserved_6;
__le64 rmac_ttl_512_1023_frms;
__le64 rmac_ttl_1024_1518_frms;
__le32 rmac_ip;
__le32 reserved_7;
__le64 rmac_ip_octets;
__le32 rmac_drop_ip;
__le32 rmac_hdr_err_ip;
__le32 reserved_8;
__le32 rmac_icmp;
__le64 rmac_tcp;
__le32 rmac_err_drp_udp;
__le32 rmac_udp;
__le64 rmac_xgmii_err_sym;
__le64 rmac_frms_q0;
__le64 rmac_frms_q1;
__le64 rmac_frms_q2;
__le64 rmac_frms_q3;
__le64 rmac_frms_q4;
__le64 rmac_frms_q5;
__le64 rmac_frms_q6;
__le64 rmac_frms_q7;
__le16 rmac_full_q3;
__le16 rmac_full_q2;
__le16 rmac_full_q1;
__le16 rmac_full_q0;
__le16 rmac_full_q7;
__le16 rmac_full_q6;
__le16 rmac_full_q5;
__le16 rmac_full_q4;
__le32 reserved_9;
__le32 rmac_pause_cnt;
__le64 rmac_xgmii_data_err_cnt;
__le64 rmac_xgmii_ctrl_err_cnt;
__le32 rmac_err_tcp;
__le32 rmac_accepted_ip;
/* PCI/PCI-X Read transaction statistics. */
__le32 new_rd_req_cnt;
__le32 rd_req_cnt;
__le32 rd_rtry_cnt;
__le32 new_rd_req_rtry_cnt;
/* PCI/PCI-X Write/Read transaction statistics. */
__le32 wr_req_cnt;
__le32 wr_rtry_rd_ack_cnt;
__le32 new_wr_req_rtry_cnt;
__le32 new_wr_req_cnt;
__le32 wr_disc_cnt;
__le32 wr_rtry_cnt;
/* PCI/PCI-X Write / DMA Transaction statistics. */
__le32 txp_wr_cnt;
__le32 rd_rtry_wr_ack_cnt;
__le32 txd_wr_cnt;
__le32 txd_rd_cnt;
__le32 rxd_wr_cnt;
__le32 rxd_rd_cnt;
__le32 rxf_wr_cnt;
__le32 txf_rd_cnt;
/* Tx MAC statistics overflow counters. */
__le32 tmac_data_octets_oflow;
__le32 tmac_frms_oflow;
__le32 tmac_bcst_frms_oflow;
__le32 tmac_mcst_frms_oflow;
__le32 tmac_ucst_frms_oflow;
__le32 tmac_ttl_octets_oflow;
__le32 tmac_any_err_frms_oflow;
__le32 tmac_nucst_frms_oflow;
__le64 tmac_vlan_frms;
__le32 tmac_drop_ip_oflow;
__le32 tmac_vld_ip_oflow;
__le32 tmac_rst_tcp_oflow;
__le32 tmac_icmp_oflow;
__le32 tpa_unknown_protocol;
__le32 tmac_udp_oflow;
__le32 reserved_10;
__le32 tpa_parse_failure;
/* Rx MAC Statistics overflow counters. */
__le32 rmac_data_octets_oflow;
__le32 rmac_vld_frms_oflow;
__le32 rmac_vld_bcst_frms_oflow;
__le32 rmac_vld_mcst_frms_oflow;
__le32 rmac_accepted_ucst_frms_oflow;
__le32 rmac_ttl_octets_oflow;
__le32 rmac_discarded_frms_oflow;
__le32 rmac_accepted_nucst_frms_oflow;
__le32 rmac_usized_frms_oflow;
__le32 rmac_drop_events_oflow;
__le32 rmac_frag_frms_oflow;
__le32 rmac_osized_frms_oflow;
__le32 rmac_ip_oflow;
__le32 rmac_jabber_frms_oflow;
__le32 rmac_icmp_oflow;
__le32 rmac_drop_ip_oflow;
__le32 rmac_err_drp_udp_oflow;
__le32 rmac_udp_oflow;
__le32 reserved_11;
__le32 rmac_pause_cnt_oflow;
__le64 rmac_ttl_1519_4095_frms;
__le64 rmac_ttl_4096_8191_frms;
__le64 rmac_ttl_8192_max_frms;
__le64 rmac_ttl_gt_max_frms;
__le64 rmac_osized_alt_frms;
__le64 rmac_jabber_alt_frms;
__le64 rmac_gt_max_alt_frms;
__le64 rmac_vlan_frms;
__le32 rmac_len_discard;
__le32 rmac_fcs_discard;
__le32 rmac_pf_discard;
__le32 rmac_da_discard;
__le32 rmac_red_discard;
__le32 rmac_rts_discard;
__le32 reserved_12;
__le32 rmac_ingm_full_discard;
__le32 reserved_13;
__le32 rmac_accepted_ip_oflow;
__le32 reserved_14;
__le32 link_fault_cnt;
u8 buffer[20];
swStat_t sw_stat;
xpakStat_t xpak_stat;
} StatInfo_t;
/*
* Structures representing different init time configuration
* parameters of the NIC.
*/
#define MAX_TX_FIFOS 8
#define MAX_RX_RINGS 8
/* FIFO mappings for all possible number of fifos configured */
static int fifo_map[][MAX_TX_FIFOS] = {
{0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 1, 1, 1, 1},
{0, 0, 0, 1, 1, 1, 2, 2},
{0, 0, 1, 1, 2, 2, 3, 3},
{0, 0, 1, 1, 2, 2, 3, 4},
{0, 0, 1, 1, 2, 3, 4, 5},
{0, 0, 1, 2, 3, 4, 5, 6},
{0, 1, 2, 3, 4, 5, 6, 7},
};
/* Maintains Per FIFO related information. */
typedef struct tx_fifo_config {
#define MAX_AVAILABLE_TXDS 8192
u32 fifo_len; /* specifies len of FIFO upto 8192, ie no of TxDLs */
/* Priority definition */
#define TX_FIFO_PRI_0 0 /*Highest */
#define TX_FIFO_PRI_1 1
#define TX_FIFO_PRI_2 2
#define TX_FIFO_PRI_3 3
#define TX_FIFO_PRI_4 4
#define TX_FIFO_PRI_5 5
#define TX_FIFO_PRI_6 6
#define TX_FIFO_PRI_7 7 /*lowest */
u8 fifo_priority; /* specifies pointer level for FIFO */
/* user should not set twos fifos with same pri */
u8 f_no_snoop;
#define NO_SNOOP_TXD 0x01
#define NO_SNOOP_TXD_BUFFER 0x02
} tx_fifo_config_t;
/* Maintains per Ring related information */
typedef struct rx_ring_config {
u32 num_rxd; /*No of RxDs per Rx Ring */
#define RX_RING_PRI_0 0 /* highest */
#define RX_RING_PRI_1 1
#define RX_RING_PRI_2 2
#define RX_RING_PRI_3 3
#define RX_RING_PRI_4 4
#define RX_RING_PRI_5 5
#define RX_RING_PRI_6 6
#define RX_RING_PRI_7 7 /* lowest */
u8 ring_priority; /*Specifies service priority of ring */
/* OSM should not set any two rings with same priority */
u8 ring_org; /*Organization of ring */
#define RING_ORG_BUFF1 0x01
#define RX_RING_ORG_BUFF3 0x03
#define RX_RING_ORG_BUFF5 0x05
u8 f_no_snoop;
#define NO_SNOOP_RXD 0x01
#define NO_SNOOP_RXD_BUFFER 0x02
} rx_ring_config_t;
/* This structure provides contains values of the tunable parameters
* of the H/W
*/
struct config_param {
/* Tx Side */
u32 tx_fifo_num; /*Number of Tx FIFOs */
u8 fifo_mapping[MAX_TX_FIFOS];
tx_fifo_config_t tx_cfg[MAX_TX_FIFOS]; /*Per-Tx FIFO config */
u32 max_txds; /*Max no. of Tx buffer descriptor per TxDL */
u64 tx_intr_type;
/* Specifies if Tx Intr is UTILZ or PER_LIST type. */
/* Rx Side */
u32 rx_ring_num; /*Number of receive rings */
#define MAX_RX_BLOCKS_PER_RING 150
rx_ring_config_t rx_cfg[MAX_RX_RINGS]; /*Per-Rx Ring config */
u8 bimodal; /*Flag for setting bimodal interrupts*/
#define HEADER_ETHERNET_II_802_3_SIZE 14
#define HEADER_802_2_SIZE 3
#define HEADER_SNAP_SIZE 5
#define HEADER_VLAN_SIZE 4
#define MIN_MTU 46
#define MAX_PYLD 1500
#define MAX_MTU (MAX_PYLD+18)
#define MAX_MTU_VLAN (MAX_PYLD+22)
#define MAX_PYLD_JUMBO 9600
#define MAX_MTU_JUMBO (MAX_PYLD_JUMBO+18)
#define MAX_MTU_JUMBO_VLAN (MAX_PYLD_JUMBO+22)
u16 bus_speed;
};
/* Structure representing MAC Addrs */
typedef struct mac_addr {
u8 mac_addr[ETH_ALEN];
} macaddr_t;
/* Structure that represent every FIFO element in the BAR1
* Address location.
*/
typedef struct _TxFIFO_element {
u64 TxDL_Pointer;
u64 List_Control;
#define TX_FIFO_LAST_TXD_NUM( val) vBIT(val,0,8)
#define TX_FIFO_FIRST_LIST BIT(14)
#define TX_FIFO_LAST_LIST BIT(15)
#define TX_FIFO_FIRSTNLAST_LIST vBIT(3,14,2)
#define TX_FIFO_SPECIAL_FUNC BIT(23)
#define TX_FIFO_DS_NO_SNOOP BIT(31)
#define TX_FIFO_BUFF_NO_SNOOP BIT(30)
} TxFIFO_element_t;
/* Tx descriptor structure */
typedef struct _TxD {
u64 Control_1;
/* bit mask */
#define TXD_LIST_OWN_XENA BIT(7)
#define TXD_T_CODE (BIT(12)|BIT(13)|BIT(14)|BIT(15))
#define TXD_T_CODE_OK(val) (|(val & TXD_T_CODE))
#define GET_TXD_T_CODE(val) ((val & TXD_T_CODE)<<12)
#define TXD_GATHER_CODE (BIT(22) | BIT(23))
#define TXD_GATHER_CODE_FIRST BIT(22)
#define TXD_GATHER_CODE_LAST BIT(23)
#define TXD_TCP_LSO_EN BIT(30)
#define TXD_UDP_COF_EN BIT(31)
#define TXD_UFO_EN BIT(31) | BIT(30)
#define TXD_TCP_LSO_MSS(val) vBIT(val,34,14)
#define TXD_UFO_MSS(val) vBIT(val,34,14)
#define TXD_BUFFER0_SIZE(val) vBIT(val,48,16)
u64 Control_2;
#define TXD_TX_CKO_CONTROL (BIT(5)|BIT(6)|BIT(7))
#define TXD_TX_CKO_IPV4_EN BIT(5)
#define TXD_TX_CKO_TCP_EN BIT(6)
#define TXD_TX_CKO_UDP_EN BIT(7)
#define TXD_VLAN_ENABLE BIT(15)
#define TXD_VLAN_TAG(val) vBIT(val,16,16)
#define TXD_INT_NUMBER(val) vBIT(val,34,6)
#define TXD_INT_TYPE_PER_LIST BIT(47)
#define TXD_INT_TYPE_UTILZ BIT(46)
#define TXD_SET_MARKER vBIT(0x6,0,4)
u64 Buffer_Pointer;
u64 Host_Control; /* reserved for host */
} TxD_t;
/* Structure to hold the phy and virt addr of every TxDL. */
typedef struct list_info_hold {
dma_addr_t list_phy_addr;
void *list_virt_addr;
} list_info_hold_t;
/* Rx descriptor structure for 1 buffer mode */
typedef struct _RxD_t {
u64 Host_Control; /* reserved for host */
u64 Control_1;
#define RXD_OWN_XENA BIT(7)
#define RXD_T_CODE (BIT(12)|BIT(13)|BIT(14)|BIT(15))
#define RXD_FRAME_PROTO vBIT(0xFFFF,24,8)
#define RXD_FRAME_PROTO_IPV4 BIT(27)
#define RXD_FRAME_PROTO_IPV6 BIT(28)
#define RXD_FRAME_IP_FRAG BIT(29)
#define RXD_FRAME_PROTO_TCP BIT(30)
#define RXD_FRAME_PROTO_UDP BIT(31)
#define TCP_OR_UDP_FRAME (RXD_FRAME_PROTO_TCP | RXD_FRAME_PROTO_UDP)
#define RXD_GET_L3_CKSUM(val) ((u16)(val>> 16) & 0xFFFF)
#define RXD_GET_L4_CKSUM(val) ((u16)(val) & 0xFFFF)
u64 Control_2;
#define THE_RXD_MARK 0x3
#define SET_RXD_MARKER vBIT(THE_RXD_MARK, 0, 2)
#define GET_RXD_MARKER(ctrl) ((ctrl & SET_RXD_MARKER) >> 62)
#define MASK_VLAN_TAG vBIT(0xFFFF,48,16)
#define SET_VLAN_TAG(val) vBIT(val,48,16)
#define SET_NUM_TAG(val) vBIT(val,16,32)
} RxD_t;
/* Rx descriptor structure for 1 buffer mode */
typedef struct _RxD1_t {
struct _RxD_t h;
#define MASK_BUFFER0_SIZE_1 vBIT(0x3FFF,2,14)
#define SET_BUFFER0_SIZE_1(val) vBIT(val,2,14)
#define RXD_GET_BUFFER0_SIZE_1(_Control_2) \
(u16)((_Control_2 & MASK_BUFFER0_SIZE_1) >> 48)
u64 Buffer0_ptr;
} RxD1_t;
/* Rx descriptor structure for 3 or 2 buffer mode */
typedef struct _RxD3_t {
struct _RxD_t h;
#define MASK_BUFFER0_SIZE_3 vBIT(0xFF,2,14)
#define MASK_BUFFER1_SIZE_3 vBIT(0xFFFF,16,16)
#define MASK_BUFFER2_SIZE_3 vBIT(0xFFFF,32,16)
#define SET_BUFFER0_SIZE_3(val) vBIT(val,8,8)
#define SET_BUFFER1_SIZE_3(val) vBIT(val,16,16)
#define SET_BUFFER2_SIZE_3(val) vBIT(val,32,16)
#define RXD_GET_BUFFER0_SIZE_3(Control_2) \
(u8)((Control_2 & MASK_BUFFER0_SIZE_3) >> 48)
#define RXD_GET_BUFFER1_SIZE_3(Control_2) \
(u16)((Control_2 & MASK_BUFFER1_SIZE_3) >> 32)
#define RXD_GET_BUFFER2_SIZE_3(Control_2) \
(u16)((Control_2 & MASK_BUFFER2_SIZE_3) >> 16)
#define BUF0_LEN 40
#define BUF1_LEN 1
u64 Buffer0_ptr;
u64 Buffer1_ptr;
u64 Buffer2_ptr;
} RxD3_t;
/* Structure that represents the Rx descriptor block which contains
* 128 Rx descriptors.
*/
typedef struct _RxD_block {
#define MAX_RXDS_PER_BLOCK_1 127
RxD1_t rxd[MAX_RXDS_PER_BLOCK_1];
u64 reserved_0;
#define END_OF_BLOCK 0xFEFFFFFFFFFFFFFFULL
u64 reserved_1; /* 0xFEFFFFFFFFFFFFFF to mark last
* Rxd in this blk */
u64 reserved_2_pNext_RxD_block; /* Logical ptr to next */
u64 pNext_RxD_Blk_physical; /* Buff0_ptr.In a 32 bit arch
* the upper 32 bits should
* be 0 */
} RxD_block_t;
#define SIZE_OF_BLOCK 4096
#define RXD_MODE_1 0
#define RXD_MODE_3A 1
#define RXD_MODE_3B 2
/* Structure to hold virtual addresses of Buf0 and Buf1 in
* 2buf mode. */
typedef struct bufAdd {
void *ba_0_org;
void *ba_1_org;
void *ba_0;
void *ba_1;
} buffAdd_t;
/* Structure which stores all the MAC control parameters */
/* This structure stores the offset of the RxD in the ring
* from which the Rx Interrupt processor can start picking
* up the RxDs for processing.
*/
typedef struct _rx_curr_get_info_t {
u32 block_index;
u32 offset;
u32 ring_len;
} rx_curr_get_info_t;
typedef rx_curr_get_info_t rx_curr_put_info_t;
/* This structure stores the offset of the TxDl in the FIFO
* from which the Tx Interrupt processor can start picking
* up the TxDLs for send complete interrupt processing.
*/
typedef struct {
u32 offset;
u32 fifo_len;
} tx_curr_get_info_t;
typedef tx_curr_get_info_t tx_curr_put_info_t;
typedef struct rxd_info {
void *virt_addr;
dma_addr_t dma_addr;
}rxd_info_t;
/* Structure that holds the Phy and virt addresses of the Blocks */
typedef struct rx_block_info {
void *block_virt_addr;
dma_addr_t block_dma_addr;
rxd_info_t *rxds;
} rx_block_info_t;
/* pre declaration of the nic structure */
typedef struct s2io_nic nic_t;
/* Ring specific structure */
typedef struct ring_info {
/* The ring number */
int ring_no;
/*
* Place holders for the virtual and physical addresses of
* all the Rx Blocks
*/
rx_block_info_t rx_blocks[MAX_RX_BLOCKS_PER_RING];
int block_count;
int pkt_cnt;
/*
* Put pointer info which indictes which RxD has to be replenished
* with a new buffer.
*/
rx_curr_put_info_t rx_curr_put_info;
/*
* Get pointer info which indictes which is the last RxD that was
* processed by the driver.
*/
rx_curr_get_info_t rx_curr_get_info;
#ifndef CONFIG_S2IO_NAPI
/* Index to the absolute position of the put pointer of Rx ring */
int put_pos;
#endif
/* Buffer Address store. */
buffAdd_t **ba;
nic_t *nic;
} ring_info_t;
/* Fifo specific structure */
typedef struct fifo_info {
/* FIFO number */
int fifo_no;
/* Maximum TxDs per TxDL */
int max_txds;
/* Place holder of all the TX List's Phy and Virt addresses. */
list_info_hold_t *list_info;
/*
* Current offset within the tx FIFO where driver would write
* new Tx frame
*/
tx_curr_put_info_t tx_curr_put_info;
/*
* Current offset within tx FIFO from where the driver would start freeing
* the buffers
*/
tx_curr_get_info_t tx_curr_get_info;
nic_t *nic;
}fifo_info_t;
/* Information related to the Tx and Rx FIFOs and Rings of Xena
* is maintained in this structure.
*/
typedef struct mac_info {
/* tx side stuff */
/* logical pointer of start of each Tx FIFO */
TxFIFO_element_t __iomem *tx_FIFO_start[MAX_TX_FIFOS];
/* Fifo specific structure */
fifo_info_t fifos[MAX_TX_FIFOS];
/* Save virtual address of TxD page with zero DMA addr(if any) */
void *zerodma_virt_addr;
/* rx side stuff */
/* Ring specific structure */
ring_info_t rings[MAX_RX_RINGS];
u16 rmac_pause_time;
u16 mc_pause_threshold_q0q3;
u16 mc_pause_threshold_q4q7;
void *stats_mem; /* orignal pointer to allocated mem */
dma_addr_t stats_mem_phy; /* Physical address of the stat block */
u32 stats_mem_sz;
StatInfo_t *stats_info; /* Logical address of the stat block */
} mac_info_t;
/* structure representing the user defined MAC addresses */
typedef struct {
char addr[ETH_ALEN];
int usage_cnt;
} usr_addr_t;
/* Default Tunable parameters of the NIC. */
#define DEFAULT_FIFO_0_LEN 4096
#define DEFAULT_FIFO_1_7_LEN 512
#define SMALL_BLK_CNT 30
#define LARGE_BLK_CNT 100
/*
* Structure to keep track of the MSI-X vectors and the corresponding
* argument registered against each vector
*/
#define MAX_REQUESTED_MSI_X 17
struct s2io_msix_entry
{
u16 vector;
u16 entry;
void *arg;
u8 type;
#define MSIX_FIFO_TYPE 1
#define MSIX_RING_TYPE 2
u8 in_use;
#define MSIX_REGISTERED_SUCCESS 0xAA
};
struct msix_info_st {
u64 addr;
u64 data;
};
[PATCH] S2io: Large Receive Offload (LRO) feature(v2) for Neterion (s2io) 10GbE Xframe PCI-X and PCI-E NICs Hi, Below is a patch for the Large Receive Offload feature. Please review and let us know your comments. LRO algorithm was described in an OLS 2005 presentation, located at ftp.s2io.com user: linuxdocs password: HALdocs The same ftp site has Programming Manual for Xframe-I ASIC. LRO feature is supported on Neterion Xframe-I, Xframe-II and Xframe-Express 10GbE NICs. Brief description: The Large Receive Offload(LRO) feature is a stateless offload that is complementary to TSO feature but on the receive path. The idea is to combine and collapse(upto 64K maximum) in the driver, in-sequence TCP packets belonging to the same session. It is mainly designed to improve 1500 mtu receive performance, since Jumbo frame performance is already close to 10GbE line rate. Some performance numbers are attached below. Implementation details: 1. Handle packet chains from multiple sessions(current default MAX_LRO_SESSSIONS=32). 2. Examine each packet for eligiblity to aggregate. A packet is considered eligible if it meets all the below criteria. a. It is a TCP/IP packet and L2 type is not LLC or SNAP. b. The packet has no checksum errors(L3 and L4). c. There are no IP options. The only TCP option supported is timestamps. d. Search and locate the LRO object corresponding to this socket and ensure packet is in TCP sequence. e. It's not a special packet(SYN, FIN, RST, URG, PSH etc. flags are not set). f. TCP payload is non-zero(It's not a pure ACK). g. It's not an IP-fragmented packet. 3. If a packet is found eligible, the LRO object is updated with information such as next sequence number expected, current length of aggregated packet and so on. If not eligible or max packets reached, update IP and TCP headers of first packet in the chain and pass it up to stack. 4. The frag_list in skb structure is used to chain packets into one large packet. Kernel changes required: None Performance results: Main focus of the initial testing was on 1500 mtu receiver, since this is a bottleneck not covered by the existing stateless offloads. There are couple disclaimers about the performance results below: 1. Your mileage will vary!!!! We initially concentrated on couple pci-x 2.0 platforms that are powerful enough to push 10 GbE NIC and do not have bottlenecks other than cpu%; testing on other platforms is still in progress. On some lower end systems we are seeing lower gains. 2. Current LRO implementation is still (for the most part) software based, and therefore performance potential of the feature is far from being realized. Full hw implementation of LRO is expected in the next version of Xframe ASIC. Performance delta(with MTU=1500) going from LRO disabled to enabled: IBM 2-way Xeon (x366) : 3.5 to 7.1 Gbps 2-way Opteron : 4.5 to 6.1 Gbps Signed-off-by: Ravinandan Arakali <ravinandan.arakali@neterion.com> Signed-off-by: Jeff Garzik <jgarzik@pobox.com>
2006-01-26 03:53:07 +08:00
/* Data structure to represent a LRO session */
typedef struct lro {
struct sk_buff *parent;
struct sk_buff *last_frag;
[PATCH] S2io: Large Receive Offload (LRO) feature(v2) for Neterion (s2io) 10GbE Xframe PCI-X and PCI-E NICs Hi, Below is a patch for the Large Receive Offload feature. Please review and let us know your comments. LRO algorithm was described in an OLS 2005 presentation, located at ftp.s2io.com user: linuxdocs password: HALdocs The same ftp site has Programming Manual for Xframe-I ASIC. LRO feature is supported on Neterion Xframe-I, Xframe-II and Xframe-Express 10GbE NICs. Brief description: The Large Receive Offload(LRO) feature is a stateless offload that is complementary to TSO feature but on the receive path. The idea is to combine and collapse(upto 64K maximum) in the driver, in-sequence TCP packets belonging to the same session. It is mainly designed to improve 1500 mtu receive performance, since Jumbo frame performance is already close to 10GbE line rate. Some performance numbers are attached below. Implementation details: 1. Handle packet chains from multiple sessions(current default MAX_LRO_SESSSIONS=32). 2. Examine each packet for eligiblity to aggregate. A packet is considered eligible if it meets all the below criteria. a. It is a TCP/IP packet and L2 type is not LLC or SNAP. b. The packet has no checksum errors(L3 and L4). c. There are no IP options. The only TCP option supported is timestamps. d. Search and locate the LRO object corresponding to this socket and ensure packet is in TCP sequence. e. It's not a special packet(SYN, FIN, RST, URG, PSH etc. flags are not set). f. TCP payload is non-zero(It's not a pure ACK). g. It's not an IP-fragmented packet. 3. If a packet is found eligible, the LRO object is updated with information such as next sequence number expected, current length of aggregated packet and so on. If not eligible or max packets reached, update IP and TCP headers of first packet in the chain and pass it up to stack. 4. The frag_list in skb structure is used to chain packets into one large packet. Kernel changes required: None Performance results: Main focus of the initial testing was on 1500 mtu receiver, since this is a bottleneck not covered by the existing stateless offloads. There are couple disclaimers about the performance results below: 1. Your mileage will vary!!!! We initially concentrated on couple pci-x 2.0 platforms that are powerful enough to push 10 GbE NIC and do not have bottlenecks other than cpu%; testing on other platforms is still in progress. On some lower end systems we are seeing lower gains. 2. Current LRO implementation is still (for the most part) software based, and therefore performance potential of the feature is far from being realized. Full hw implementation of LRO is expected in the next version of Xframe ASIC. Performance delta(with MTU=1500) going from LRO disabled to enabled: IBM 2-way Xeon (x366) : 3.5 to 7.1 Gbps 2-way Opteron : 4.5 to 6.1 Gbps Signed-off-by: Ravinandan Arakali <ravinandan.arakali@neterion.com> Signed-off-by: Jeff Garzik <jgarzik@pobox.com>
2006-01-26 03:53:07 +08:00
u8 *l2h;
struct iphdr *iph;
struct tcphdr *tcph;
u32 tcp_next_seq;
u32 tcp_ack;
int total_len;
int frags_len;
int sg_num;
int in_use;
u16 window;
u32 cur_tsval;
u32 cur_tsecr;
u8 saw_ts;
}lro_t;
/* Structure representing one instance of the NIC */
struct s2io_nic {
int rxd_mode;
#ifdef CONFIG_S2IO_NAPI
/*
* Count of packets to be processed in a given iteration, it will be indicated
* by the quota field of the device structure when NAPI is enabled.
*/
int pkts_to_process;
#endif
struct net_device *dev;
mac_info_t mac_control;
struct config_param config;
struct pci_dev *pdev;
void __iomem *bar0;
void __iomem *bar1;
#define MAX_MAC_SUPPORTED 16
#define MAX_SUPPORTED_MULTICASTS MAX_MAC_SUPPORTED
macaddr_t def_mac_addr[MAX_MAC_SUPPORTED];
macaddr_t pre_mac_addr[MAX_MAC_SUPPORTED];
struct net_device_stats stats;
int high_dma_flag;
int device_close_flag;
int device_enabled_once;
char name[60];
struct tasklet_struct task;
volatile unsigned long tasklet_status;
/* Timer that handles I/O errors/exceptions */
struct timer_list alarm_timer;
/* Space to back up the PCI config space */
u32 config_space[256 / sizeof(u32)];
atomic_t rx_bufs_left[MAX_RX_RINGS];
spinlock_t tx_lock;
#ifndef CONFIG_S2IO_NAPI
spinlock_t put_lock;
#endif
#define PROMISC 1
#define ALL_MULTI 2
#define MAX_ADDRS_SUPPORTED 64
u16 usr_addr_count;
u16 mc_addr_count;
usr_addr_t usr_addrs[MAX_ADDRS_SUPPORTED];
u16 m_cast_flg;
u16 all_multi_pos;
u16 promisc_flg;
u16 tx_pkt_count;
u16 rx_pkt_count;
u16 tx_err_count;
u16 rx_err_count;
/* Id timer, used to blink NIC to physically identify NIC. */
struct timer_list id_timer;
/* Restart timer, used to restart NIC if the device is stuck and
* a schedule task that will set the correct Link state once the
* NIC's PHY has stabilized after a state change.
*/
struct work_struct rst_timer_task;
struct work_struct set_link_task;
/* Flag that can be used to turn on or turn off the Rx checksum
* offload feature.
*/
int rx_csum;
/* after blink, the adapter must be restored with original
* values.
*/
u64 adapt_ctrl_org;
/* Last known link state. */
u16 last_link_state;
#define LINK_DOWN 1
#define LINK_UP 2
int task_flag;
#define CARD_DOWN 1
#define CARD_UP 2
atomic_t card_state;
volatile unsigned long link_state;
struct vlan_group *vlgrp;
#define MSIX_FLG 0xA5
struct msix_entry *entries;
struct s2io_msix_entry *s2io_entries;
char desc[MAX_REQUESTED_MSI_X][25];
int avail_msix_vectors; /* No. of MSI-X vectors granted by system */
struct msix_info_st msix_info[0x3f];
#define XFRAME_I_DEVICE 1
#define XFRAME_II_DEVICE 2
u8 device_type;
[PATCH] S2io: Large Receive Offload (LRO) feature(v2) for Neterion (s2io) 10GbE Xframe PCI-X and PCI-E NICs Hi, Below is a patch for the Large Receive Offload feature. Please review and let us know your comments. LRO algorithm was described in an OLS 2005 presentation, located at ftp.s2io.com user: linuxdocs password: HALdocs The same ftp site has Programming Manual for Xframe-I ASIC. LRO feature is supported on Neterion Xframe-I, Xframe-II and Xframe-Express 10GbE NICs. Brief description: The Large Receive Offload(LRO) feature is a stateless offload that is complementary to TSO feature but on the receive path. The idea is to combine and collapse(upto 64K maximum) in the driver, in-sequence TCP packets belonging to the same session. It is mainly designed to improve 1500 mtu receive performance, since Jumbo frame performance is already close to 10GbE line rate. Some performance numbers are attached below. Implementation details: 1. Handle packet chains from multiple sessions(current default MAX_LRO_SESSSIONS=32). 2. Examine each packet for eligiblity to aggregate. A packet is considered eligible if it meets all the below criteria. a. It is a TCP/IP packet and L2 type is not LLC or SNAP. b. The packet has no checksum errors(L3 and L4). c. There are no IP options. The only TCP option supported is timestamps. d. Search and locate the LRO object corresponding to this socket and ensure packet is in TCP sequence. e. It's not a special packet(SYN, FIN, RST, URG, PSH etc. flags are not set). f. TCP payload is non-zero(It's not a pure ACK). g. It's not an IP-fragmented packet. 3. If a packet is found eligible, the LRO object is updated with information such as next sequence number expected, current length of aggregated packet and so on. If not eligible or max packets reached, update IP and TCP headers of first packet in the chain and pass it up to stack. 4. The frag_list in skb structure is used to chain packets into one large packet. Kernel changes required: None Performance results: Main focus of the initial testing was on 1500 mtu receiver, since this is a bottleneck not covered by the existing stateless offloads. There are couple disclaimers about the performance results below: 1. Your mileage will vary!!!! We initially concentrated on couple pci-x 2.0 platforms that are powerful enough to push 10 GbE NIC and do not have bottlenecks other than cpu%; testing on other platforms is still in progress. On some lower end systems we are seeing lower gains. 2. Current LRO implementation is still (for the most part) software based, and therefore performance potential of the feature is far from being realized. Full hw implementation of LRO is expected in the next version of Xframe ASIC. Performance delta(with MTU=1500) going from LRO disabled to enabled: IBM 2-way Xeon (x366) : 3.5 to 7.1 Gbps 2-way Opteron : 4.5 to 6.1 Gbps Signed-off-by: Ravinandan Arakali <ravinandan.arakali@neterion.com> Signed-off-by: Jeff Garzik <jgarzik@pobox.com>
2006-01-26 03:53:07 +08:00
#define MAX_LRO_SESSIONS 32
lro_t lro0_n[MAX_LRO_SESSIONS];
unsigned long clubbed_frms_cnt;
unsigned long sending_both;
u8 lro;
u16 lro_max_aggr_per_sess;
#define INTA 0
#define MSI 1
#define MSI_X 2
u8 intr_type;
spinlock_t rx_lock;
atomic_t isr_cnt;
u64 *ufo_in_band_v;
#define VPD_PRODUCT_NAME_LEN 50
u8 product_name[VPD_PRODUCT_NAME_LEN];
};
#define RESET_ERROR 1;
#define CMD_ERROR 2;
/* OS related system calls */
#ifndef readq
static inline u64 readq(void __iomem *addr)
{
u64 ret = 0;
ret = readl(addr + 4);
ret <<= 32;
ret |= readl(addr);
return ret;
}
#endif
#ifndef writeq
static inline void writeq(u64 val, void __iomem *addr)
{
writel((u32) (val), addr);
writel((u32) (val >> 32), (addr + 4));
}
#endif
/*
* Some registers have to be written in a particular order to
* expect correct hardware operation. The macro SPECIAL_REG_WRITE
* is used to perform such ordered writes. Defines UF (Upper First)
* and LF (Lower First) will be used to specify the required write order.
*/
#define UF 1
#define LF 2
static inline void SPECIAL_REG_WRITE(u64 val, void __iomem *addr, int order)
{
u32 ret;
if (order == LF) {
writel((u32) (val), addr);
ret = readl(addr);
writel((u32) (val >> 32), (addr + 4));
ret = readl(addr + 4);
} else {
writel((u32) (val >> 32), (addr + 4));
ret = readl(addr + 4);
writel((u32) (val), addr);
ret = readl(addr);
}
}
/* Interrupt related values of Xena */
#define ENABLE_INTRS 1
#define DISABLE_INTRS 2
/* Highest level interrupt blocks */
#define TX_PIC_INTR (0x0001<<0)
#define TX_DMA_INTR (0x0001<<1)
#define TX_MAC_INTR (0x0001<<2)
#define TX_XGXS_INTR (0x0001<<3)
#define TX_TRAFFIC_INTR (0x0001<<4)
#define RX_PIC_INTR (0x0001<<5)
#define RX_DMA_INTR (0x0001<<6)
#define RX_MAC_INTR (0x0001<<7)
#define RX_XGXS_INTR (0x0001<<8)
#define RX_TRAFFIC_INTR (0x0001<<9)
#define MC_INTR (0x0001<<10)
#define ENA_ALL_INTRS ( TX_PIC_INTR | \
TX_DMA_INTR | \
TX_MAC_INTR | \
TX_XGXS_INTR | \
TX_TRAFFIC_INTR | \
RX_PIC_INTR | \
RX_DMA_INTR | \
RX_MAC_INTR | \
RX_XGXS_INTR | \
RX_TRAFFIC_INTR | \
MC_INTR )
/* Interrupt masks for the general interrupt mask register */
#define DISABLE_ALL_INTRS 0xFFFFFFFFFFFFFFFFULL
#define TXPIC_INT_M BIT(0)
#define TXDMA_INT_M BIT(1)
#define TXMAC_INT_M BIT(2)
#define TXXGXS_INT_M BIT(3)
#define TXTRAFFIC_INT_M BIT(8)
#define PIC_RX_INT_M BIT(32)
#define RXDMA_INT_M BIT(33)
#define RXMAC_INT_M BIT(34)
#define MC_INT_M BIT(35)
#define RXXGXS_INT_M BIT(36)
#define RXTRAFFIC_INT_M BIT(40)
/* PIC level Interrupts TODO*/
/* DMA level Inressupts */
#define TXDMA_PFC_INT_M BIT(0)
#define TXDMA_PCC_INT_M BIT(2)
/* PFC block interrupts */
#define PFC_MISC_ERR_1 BIT(0) /* Interrupt to indicate FIFO full */
/* PCC block interrupts. */
#define PCC_FB_ECC_ERR vBIT(0xff, 16, 8) /* Interrupt to indicate
PCC_FB_ECC Error. */
#define RXD_GET_VLAN_TAG(Control_2) (u16)(Control_2 & MASK_VLAN_TAG)
/*
* Prototype declaration.
*/
static int __devinit s2io_init_nic(struct pci_dev *pdev,
const struct pci_device_id *pre);
static void __devexit s2io_rem_nic(struct pci_dev *pdev);
static int init_shared_mem(struct s2io_nic *sp);
static void free_shared_mem(struct s2io_nic *sp);
static int init_nic(struct s2io_nic *nic);
static void rx_intr_handler(ring_info_t *ring_data);
static void tx_intr_handler(fifo_info_t *fifo_data);
static void alarm_intr_handler(struct s2io_nic *sp);
static int s2io_starter(void);
static void s2io_tx_watchdog(struct net_device *dev);
static void s2io_tasklet(unsigned long dev_addr);
static void s2io_set_multicast(struct net_device *dev);
static int rx_osm_handler(ring_info_t *ring_data, RxD_t * rxdp);
static void s2io_link(nic_t * sp, int link);
#if defined(CONFIG_S2IO_NAPI)
static int s2io_poll(struct net_device *dev, int *budget);
#endif
static void s2io_init_pci(nic_t * sp);
static int s2io_set_mac_addr(struct net_device *dev, u8 * addr);
static void s2io_alarm_handle(unsigned long data);
static int s2io_enable_msi(nic_t *nic);
IRQ: Maintain regs pointer globally rather than passing to IRQ handlers Maintain a per-CPU global "struct pt_regs *" variable which can be used instead of passing regs around manually through all ~1800 interrupt handlers in the Linux kernel. The regs pointer is used in few places, but it potentially costs both stack space and code to pass it around. On the FRV arch, removing the regs parameter from all the genirq function results in a 20% speed up of the IRQ exit path (ie: from leaving timer_interrupt() to leaving do_IRQ()). Where appropriate, an arch may override the generic storage facility and do something different with the variable. On FRV, for instance, the address is maintained in GR28 at all times inside the kernel as part of general exception handling. Having looked over the code, it appears that the parameter may be handed down through up to twenty or so layers of functions. Consider a USB character device attached to a USB hub, attached to a USB controller that posts its interrupts through a cascaded auxiliary interrupt controller. A character device driver may want to pass regs to the sysrq handler through the input layer which adds another few layers of parameter passing. I've build this code with allyesconfig for x86_64 and i386. I've runtested the main part of the code on FRV and i386, though I can't test most of the drivers. I've also done partial conversion for powerpc and MIPS - these at least compile with minimal configurations. This will affect all archs. Mostly the changes should be relatively easy. Take do_IRQ(), store the regs pointer at the beginning, saving the old one: struct pt_regs *old_regs = set_irq_regs(regs); And put the old one back at the end: set_irq_regs(old_regs); Don't pass regs through to generic_handle_irq() or __do_IRQ(). In timer_interrupt(), this sort of change will be necessary: - update_process_times(user_mode(regs)); - profile_tick(CPU_PROFILING, regs); + update_process_times(user_mode(get_irq_regs())); + profile_tick(CPU_PROFILING); I'd like to move update_process_times()'s use of get_irq_regs() into itself, except that i386, alone of the archs, uses something other than user_mode(). Some notes on the interrupt handling in the drivers: (*) input_dev() is now gone entirely. The regs pointer is no longer stored in the input_dev struct. (*) finish_unlinks() in drivers/usb/host/ohci-q.c needs checking. It does something different depending on whether it's been supplied with a regs pointer or not. (*) Various IRQ handler function pointers have been moved to type irq_handler_t. Signed-Off-By: David Howells <dhowells@redhat.com> (cherry picked from 1b16e7ac850969f38b375e511e3fa2f474a33867 commit)
2006-10-05 21:55:46 +08:00
static irqreturn_t s2io_msi_handle(int irq, void *dev_id);
static irqreturn_t
IRQ: Maintain regs pointer globally rather than passing to IRQ handlers Maintain a per-CPU global "struct pt_regs *" variable which can be used instead of passing regs around manually through all ~1800 interrupt handlers in the Linux kernel. The regs pointer is used in few places, but it potentially costs both stack space and code to pass it around. On the FRV arch, removing the regs parameter from all the genirq function results in a 20% speed up of the IRQ exit path (ie: from leaving timer_interrupt() to leaving do_IRQ()). Where appropriate, an arch may override the generic storage facility and do something different with the variable. On FRV, for instance, the address is maintained in GR28 at all times inside the kernel as part of general exception handling. Having looked over the code, it appears that the parameter may be handed down through up to twenty or so layers of functions. Consider a USB character device attached to a USB hub, attached to a USB controller that posts its interrupts through a cascaded auxiliary interrupt controller. A character device driver may want to pass regs to the sysrq handler through the input layer which adds another few layers of parameter passing. I've build this code with allyesconfig for x86_64 and i386. I've runtested the main part of the code on FRV and i386, though I can't test most of the drivers. I've also done partial conversion for powerpc and MIPS - these at least compile with minimal configurations. This will affect all archs. Mostly the changes should be relatively easy. Take do_IRQ(), store the regs pointer at the beginning, saving the old one: struct pt_regs *old_regs = set_irq_regs(regs); And put the old one back at the end: set_irq_regs(old_regs); Don't pass regs through to generic_handle_irq() or __do_IRQ(). In timer_interrupt(), this sort of change will be necessary: - update_process_times(user_mode(regs)); - profile_tick(CPU_PROFILING, regs); + update_process_times(user_mode(get_irq_regs())); + profile_tick(CPU_PROFILING); I'd like to move update_process_times()'s use of get_irq_regs() into itself, except that i386, alone of the archs, uses something other than user_mode(). Some notes on the interrupt handling in the drivers: (*) input_dev() is now gone entirely. The regs pointer is no longer stored in the input_dev struct. (*) finish_unlinks() in drivers/usb/host/ohci-q.c needs checking. It does something different depending on whether it's been supplied with a regs pointer or not. (*) Various IRQ handler function pointers have been moved to type irq_handler_t. Signed-Off-By: David Howells <dhowells@redhat.com> (cherry picked from 1b16e7ac850969f38b375e511e3fa2f474a33867 commit)
2006-10-05 21:55:46 +08:00
s2io_msix_ring_handle(int irq, void *dev_id);
static irqreturn_t
IRQ: Maintain regs pointer globally rather than passing to IRQ handlers Maintain a per-CPU global "struct pt_regs *" variable which can be used instead of passing regs around manually through all ~1800 interrupt handlers in the Linux kernel. The regs pointer is used in few places, but it potentially costs both stack space and code to pass it around. On the FRV arch, removing the regs parameter from all the genirq function results in a 20% speed up of the IRQ exit path (ie: from leaving timer_interrupt() to leaving do_IRQ()). Where appropriate, an arch may override the generic storage facility and do something different with the variable. On FRV, for instance, the address is maintained in GR28 at all times inside the kernel as part of general exception handling. Having looked over the code, it appears that the parameter may be handed down through up to twenty or so layers of functions. Consider a USB character device attached to a USB hub, attached to a USB controller that posts its interrupts through a cascaded auxiliary interrupt controller. A character device driver may want to pass regs to the sysrq handler through the input layer which adds another few layers of parameter passing. I've build this code with allyesconfig for x86_64 and i386. I've runtested the main part of the code on FRV and i386, though I can't test most of the drivers. I've also done partial conversion for powerpc and MIPS - these at least compile with minimal configurations. This will affect all archs. Mostly the changes should be relatively easy. Take do_IRQ(), store the regs pointer at the beginning, saving the old one: struct pt_regs *old_regs = set_irq_regs(regs); And put the old one back at the end: set_irq_regs(old_regs); Don't pass regs through to generic_handle_irq() or __do_IRQ(). In timer_interrupt(), this sort of change will be necessary: - update_process_times(user_mode(regs)); - profile_tick(CPU_PROFILING, regs); + update_process_times(user_mode(get_irq_regs())); + profile_tick(CPU_PROFILING); I'd like to move update_process_times()'s use of get_irq_regs() into itself, except that i386, alone of the archs, uses something other than user_mode(). Some notes on the interrupt handling in the drivers: (*) input_dev() is now gone entirely. The regs pointer is no longer stored in the input_dev struct. (*) finish_unlinks() in drivers/usb/host/ohci-q.c needs checking. It does something different depending on whether it's been supplied with a regs pointer or not. (*) Various IRQ handler function pointers have been moved to type irq_handler_t. Signed-Off-By: David Howells <dhowells@redhat.com> (cherry picked from 1b16e7ac850969f38b375e511e3fa2f474a33867 commit)
2006-10-05 21:55:46 +08:00
s2io_msix_fifo_handle(int irq, void *dev_id);
static irqreturn_t s2io_isr(int irq, void *dev_id);
static int verify_xena_quiescence(nic_t *sp, u64 val64, int flag);
static const struct ethtool_ops netdev_ethtool_ops;
static void s2io_set_link(unsigned long data);
static int s2io_set_swapper(nic_t * sp);
static void s2io_card_down(nic_t *nic);
static int s2io_card_up(nic_t *nic);
static int get_xena_rev_id(struct pci_dev *pdev);
static void restore_xmsi_data(nic_t *nic);
[PATCH] S2io: Large Receive Offload (LRO) feature(v2) for Neterion (s2io) 10GbE Xframe PCI-X and PCI-E NICs Hi, Below is a patch for the Large Receive Offload feature. Please review and let us know your comments. LRO algorithm was described in an OLS 2005 presentation, located at ftp.s2io.com user: linuxdocs password: HALdocs The same ftp site has Programming Manual for Xframe-I ASIC. LRO feature is supported on Neterion Xframe-I, Xframe-II and Xframe-Express 10GbE NICs. Brief description: The Large Receive Offload(LRO) feature is a stateless offload that is complementary to TSO feature but on the receive path. The idea is to combine and collapse(upto 64K maximum) in the driver, in-sequence TCP packets belonging to the same session. It is mainly designed to improve 1500 mtu receive performance, since Jumbo frame performance is already close to 10GbE line rate. Some performance numbers are attached below. Implementation details: 1. Handle packet chains from multiple sessions(current default MAX_LRO_SESSSIONS=32). 2. Examine each packet for eligiblity to aggregate. A packet is considered eligible if it meets all the below criteria. a. It is a TCP/IP packet and L2 type is not LLC or SNAP. b. The packet has no checksum errors(L3 and L4). c. There are no IP options. The only TCP option supported is timestamps. d. Search and locate the LRO object corresponding to this socket and ensure packet is in TCP sequence. e. It's not a special packet(SYN, FIN, RST, URG, PSH etc. flags are not set). f. TCP payload is non-zero(It's not a pure ACK). g. It's not an IP-fragmented packet. 3. If a packet is found eligible, the LRO object is updated with information such as next sequence number expected, current length of aggregated packet and so on. If not eligible or max packets reached, update IP and TCP headers of first packet in the chain and pass it up to stack. 4. The frag_list in skb structure is used to chain packets into one large packet. Kernel changes required: None Performance results: Main focus of the initial testing was on 1500 mtu receiver, since this is a bottleneck not covered by the existing stateless offloads. There are couple disclaimers about the performance results below: 1. Your mileage will vary!!!! We initially concentrated on couple pci-x 2.0 platforms that are powerful enough to push 10 GbE NIC and do not have bottlenecks other than cpu%; testing on other platforms is still in progress. On some lower end systems we are seeing lower gains. 2. Current LRO implementation is still (for the most part) software based, and therefore performance potential of the feature is far from being realized. Full hw implementation of LRO is expected in the next version of Xframe ASIC. Performance delta(with MTU=1500) going from LRO disabled to enabled: IBM 2-way Xeon (x366) : 3.5 to 7.1 Gbps 2-way Opteron : 4.5 to 6.1 Gbps Signed-off-by: Ravinandan Arakali <ravinandan.arakali@neterion.com> Signed-off-by: Jeff Garzik <jgarzik@pobox.com>
2006-01-26 03:53:07 +08:00
static int s2io_club_tcp_session(u8 *buffer, u8 **tcp, u32 *tcp_len, lro_t **lro, RxD_t *rxdp, nic_t *sp);
static void clear_lro_session(lro_t *lro);
static void queue_rx_frame(struct sk_buff *skb);
static void update_L3L4_header(nic_t *sp, lro_t *lro);
static void lro_append_pkt(nic_t *sp, lro_t *lro, struct sk_buff *skb, u32 tcp_len);
#define s2io_tcp_mss(skb) skb_shinfo(skb)->gso_size
#define s2io_udp_mss(skb) skb_shinfo(skb)->gso_size
#define s2io_offload_type(skb) skb_shinfo(skb)->gso_type
#define S2IO_PARM_INT(X, def_val) \
static unsigned int X = def_val;\
module_param(X , uint, 0);
#endif /* _S2IO_H */