From 49397b801261160fb5f5d3f28536c792e72ecbb3 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 20 Mar 2020 16:23:05 +0300 Subject: [PATCH 01/16] net/mlx5e: Fix actions_match_supported() return The actions_match_supported() function returns a bool, true for success and false for failure. This error path is returning a negative which is cast to true but it should return false. Fixes: 4c3844d9e97e ("net/mlx5e: CT: Introduce connection tracking") Signed-off-by: Dan Carpenter Reviewed-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 901f88a886c8..c7ed468db3e0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -3057,7 +3057,7 @@ static bool actions_match_supported(struct mlx5e_priv *priv, */ NL_SET_ERR_MSG_MOD(extack, "Can't offload mirroring with action ct"); - return -EOPNOTSUPP; + return false; } } else { actions = flow->nic_attr->action; From 046826c878bd9c6d0db2de2787ac5dc354b467a2 Mon Sep 17 00:00:00 2001 From: wenxu Date: Wed, 18 Mar 2020 17:05:43 +0800 Subject: [PATCH 02/16] net/mlx5e: remove duplicated check chain_index in mlx5e_rep_setup_ft_cb The function mlx5e_rep_setup_ft_cb check chain_index is zero twice. Signed-off-by: wenxu Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index a33d15156ed5..d7fa89276ea3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -1246,8 +1246,7 @@ static int mlx5e_rep_setup_ft_cb(enum tc_setup_type type, void *type_data, case TC_SETUP_CLSFLOWER: memcpy(&tmp, f, sizeof(*f)); - if (!mlx5_esw_chains_prios_supported(esw) || - tmp.common.chain_index) + if (!mlx5_esw_chains_prios_supported(esw)) return -EOPNOTSUPP; /* Re-use tc offload path by moving the ft flow to the From 60acc105cbc23c525ddb6fed595cac4796c0040b Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Wed, 18 Mar 2020 10:55:12 +0200 Subject: [PATCH 03/16] net/mlx5: E-Switch, Enable restore table only if reg_c1 is supported Reg c0/c1 matching, rewrite of regs c0/c1, and copy header of regs c1,B is needed for the restore table to function, might not be supported by firmware, and creation of the restore table or the copy header will fail. Check reg_c1 loopback support, as firmware which supports this, should have all of the above. Fixes: 11b717d61526 ("net/mlx5: E-Switch, Get reg_c0 value on CQE") Signed-off-by: Paul Blakey Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 0b4b43ebae9a..cba95890f173 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1069,6 +1069,9 @@ esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag) struct mlx5_flow_spec *spec; void *misc; + if (!mlx5_eswitch_reg_c1_loopback_supported(esw)) + return ERR_PTR(-EOPNOTSUPP); + spec = kzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) return ERR_PTR(-ENOMEM); @@ -1477,6 +1480,9 @@ static void esw_destroy_restore_table(struct mlx5_eswitch *esw) { struct mlx5_esw_offload *offloads = &esw->offloads; + if (!mlx5_eswitch_reg_c1_loopback_supported(esw)) + return; + mlx5_modify_header_dealloc(esw->dev, offloads->restore_copy_hdr_id); mlx5_destroy_flow_group(offloads->restore_group); mlx5_destroy_flow_table(offloads->ft_offloads_restore); @@ -1496,6 +1502,9 @@ static int esw_create_restore_table(struct mlx5_eswitch *esw) u32 *flow_group_in; int err = 0; + if (!mlx5_eswitch_reg_c1_loopback_supported(esw)) + return 0; + ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_OFFLOADS); if (!ns) { esw_warn(esw->dev, "Failed to get offloads flow namespace\n"); From 7983a675ba65c5f8dae7532dcd91a40adc237da8 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Wed, 18 Mar 2020 11:43:06 +0200 Subject: [PATCH 04/16] net/mlx5: E-Switch, Enable chains only if regs loopback is enabled Register c0 loopback is needed to fully support chains and prios. Enable chains and prio only if loopback (of reg c1 which came together with c0), is enabled. To be able to check that, move enabling of loopback before eswitch chains init. Signed-off-by: Paul Blakey Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/eswitch_offloads.c | 11 +++--- .../mlx5/core/eswitch_offloads_chains.c | 35 +++++++++++-------- 2 files changed, 26 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index cba95890f173..ca6ac3876a1f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -2351,14 +2351,15 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) mutex_init(&esw->offloads.termtbl_mutex); mlx5_rdma_enable_roce(esw->dev); - err = esw_offloads_steering_init(esw); - if (err) - goto err_steering_init; err = esw_set_passing_vport_metadata(esw, true); if (err) goto err_vport_metadata; + err = esw_offloads_steering_init(esw); + if (err) + goto err_steering_init; + /* Representor will control the vport link state */ mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) vport->info.link_state = MLX5_VPORT_ADMIN_STATE_DOWN; @@ -2380,9 +2381,9 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) esw_offloads_unload_rep(esw, MLX5_VPORT_UPLINK); err_uplink: esw_set_passing_vport_metadata(esw, false); -err_vport_metadata: - esw_offloads_steering_cleanup(esw); err_steering_init: + esw_offloads_steering_cleanup(esw); +err_vport_metadata: mlx5_rdma_disable_roce(esw->dev); mutex_destroy(&esw->offloads.termtbl_mutex); return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c index 1e275a8441de..090e56c5414d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c @@ -280,7 +280,8 @@ create_fdb_chain_restore(struct fdb_chain *fdb_chain) u32 index; int err; - if (fdb_chain->chain == mlx5_esw_chains_get_ft_chain(esw)) + if (fdb_chain->chain == mlx5_esw_chains_get_ft_chain(esw) || + !mlx5_esw_chains_prios_supported(esw)) return 0; err = mapping_add(esw_chains_mapping(esw), &fdb_chain->chain, &index); @@ -335,6 +336,18 @@ create_fdb_chain_restore(struct fdb_chain *fdb_chain) return err; } +static void destroy_fdb_chain_restore(struct fdb_chain *fdb_chain) +{ + struct mlx5_eswitch *esw = fdb_chain->esw; + + if (!fdb_chain->miss_modify_hdr) + return; + + mlx5_del_flow_rules(fdb_chain->restore_rule); + mlx5_modify_header_dealloc(esw->dev, fdb_chain->miss_modify_hdr); + mapping_remove(esw_chains_mapping(esw), fdb_chain->id); +} + static struct fdb_chain * mlx5_esw_chains_create_fdb_chain(struct mlx5_eswitch *esw, u32 chain) { @@ -361,11 +374,7 @@ mlx5_esw_chains_create_fdb_chain(struct mlx5_eswitch *esw, u32 chain) return fdb_chain; err_insert: - if (fdb_chain->chain != mlx5_esw_chains_get_ft_chain(esw)) { - mlx5_del_flow_rules(fdb_chain->restore_rule); - mlx5_modify_header_dealloc(esw->dev, - fdb_chain->miss_modify_hdr); - } + destroy_fdb_chain_restore(fdb_chain); err_restore: kvfree(fdb_chain); return ERR_PTR(err); @@ -379,14 +388,7 @@ mlx5_esw_chains_destroy_fdb_chain(struct fdb_chain *fdb_chain) rhashtable_remove_fast(&esw_chains_ht(esw), &fdb_chain->node, chain_params); - if (fdb_chain->chain != mlx5_esw_chains_get_ft_chain(esw)) { - mlx5_del_flow_rules(fdb_chain->restore_rule); - mlx5_modify_header_dealloc(esw->dev, - fdb_chain->miss_modify_hdr); - - mapping_remove(esw_chains_mapping(esw), fdb_chain->id); - } - + destroy_fdb_chain_restore(fdb_chain); kvfree(fdb_chain); } @@ -423,7 +425,7 @@ mlx5_esw_chains_add_miss_rule(struct fdb_chain *fdb_chain, dest.ft = next_fdb; if (next_fdb == tc_end_fdb(esw) && - fdb_modify_header_fwd_to_table_supported(esw)) { + mlx5_esw_chains_prios_supported(esw)) { act.modify_hdr = fdb_chain->miss_modify_hdr; act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; } @@ -783,6 +785,9 @@ mlx5_esw_chains_init(struct mlx5_eswitch *esw) esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) { esw->fdb_table.flags &= ~ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; esw_warn(dev, "Tc chains and priorities offload aren't supported, update firmware if needed\n"); + } else if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) { + esw->fdb_table.flags &= ~ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; + esw_warn(dev, "Tc chains and priorities offload aren't supported\n"); } else if (!fdb_modify_header_fwd_to_table_supported(esw)) { /* Disabled when ttl workaround is needed, e.g * when ESWITCH_IPV4_TTL_MODIFY_ENABLE = true in mlxconfig From c8508713c71c21f5a16469dcc75ffb4381fbfeb4 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Thu, 19 Mar 2020 17:48:18 +0200 Subject: [PATCH 05/16] net/mlx5: E-Switch, free flow_group_in after creating the restore table We allocate a temporary memory but forget to free it. Fixes: 11b717d61526 ("net/mlx5: E-Switch, Get reg_c0 value on CQE") Signed-off-by: Roi Dayan Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index ca6ac3876a1f..088fb51123e2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1566,6 +1566,8 @@ static int esw_create_restore_table(struct mlx5_eswitch *esw) esw->offloads.restore_group = g; esw->offloads.restore_copy_hdr_id = mod_hdr; + kvfree(flow_group_in); + return 0; err_mod_hdr: From d528d4970503edafc23bd43d322a818d74954f7a Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Mon, 23 Mar 2020 12:14:58 +0200 Subject: [PATCH 06/16] net/mlx5: E-Switch, Use correct type for chain, prio and level values The correct type is u32. Fixes: d18296ffd9cc ("net/mlx5: E-Switch, Introduce global tables") Signed-off-by: Roi Dayan Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c index 090e56c5414d..184cea62254f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c @@ -730,7 +730,8 @@ mlx5_esw_chains_get_tc_end_ft(struct mlx5_eswitch *esw) struct mlx5_flow_table * mlx5_esw_chains_create_global_table(struct mlx5_eswitch *esw) { - int chain, prio, level, err; + u32 chain, prio, level; + int err; if (!fdb_ignore_flow_level_supported(esw)) { err = -EOPNOTSUPP; From b820ce00e03af3255e1db9cc086caf742ea32809 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 26 Feb 2020 15:03:16 +0200 Subject: [PATCH 07/16] net/mlx5: Simplify matching group searches Instead of using two different structs for searching groups with the same match, use a single struct and thus simplify the code, make it more readable and smaller size which means less code cache misses. text data bss dec hex before: 35524 2744 0 38268 957c after: 35038 2744 0 37782 9396 When testing add 70000 rules, delete all the rules, and repeat three times taking the average, we get (time in seconds): Before the change: insert 16.80, delete 11.02 After the change: insert 16.55, delete 10.95 Signed-off-by: Eli Cohen Reviewed-by: Mark Bloch Reviewed-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/fs_core.c | 41 +++++-------------- 1 file changed, 11 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index c93bd55fab06..a9ec40ca7893 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1577,28 +1577,19 @@ struct match_list { struct mlx5_flow_group *g; }; -struct match_list_head { - struct list_head list; - struct match_list first; -}; - -static void free_match_list(struct match_list_head *head, bool ft_locked) +static void free_match_list(struct match_list *head, bool ft_locked) { - if (!list_empty(&head->list)) { - struct match_list *iter, *match_tmp; + struct match_list *iter, *match_tmp; - list_del(&head->first.list); - tree_put_node(&head->first.g->node, ft_locked); - list_for_each_entry_safe(iter, match_tmp, &head->list, - list) { - tree_put_node(&iter->g->node, ft_locked); - list_del(&iter->list); - kfree(iter); - } + list_for_each_entry_safe(iter, match_tmp, &head->list, + list) { + tree_put_node(&iter->g->node, ft_locked); + list_del(&iter->list); + kfree(iter); } } -static int build_match_list(struct match_list_head *match_head, +static int build_match_list(struct match_list *match_head, struct mlx5_flow_table *ft, const struct mlx5_flow_spec *spec, bool ft_locked) @@ -1615,14 +1606,8 @@ static int build_match_list(struct match_list_head *match_head, rhl_for_each_entry_rcu(g, tmp, list, hash) { struct match_list *curr_match; - if (likely(list_empty(&match_head->list))) { - if (!tree_get_node(&g->node)) - continue; - match_head->first.g = g; - list_add_tail(&match_head->first.list, - &match_head->list); + if (unlikely(!tree_get_node(&g->node))) continue; - } curr_match = kmalloc(sizeof(*curr_match), GFP_ATOMIC); if (!curr_match) { @@ -1630,10 +1615,6 @@ static int build_match_list(struct match_list_head *match_head, err = -ENOMEM; goto out; } - if (!tree_get_node(&g->node)) { - kfree(curr_match); - continue; - } curr_match->g = g; list_add_tail(&curr_match->list, &match_head->list); } @@ -1785,9 +1766,9 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, { struct mlx5_flow_steering *steering = get_steering(&ft->node); - struct mlx5_flow_group *g; struct mlx5_flow_handle *rule; - struct match_list_head match_head; + struct match_list match_head; + struct mlx5_flow_group *g; bool take_write = false; struct fs_fte *fte; int version; From 454401aeb2957e0d996bc9208b78aa4d8ac12964 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 4 Mar 2020 10:32:56 +0200 Subject: [PATCH 08/16] net/mlx5: Fix group version management When adding a rule to a flow group we need increment the version of the group. Current code fails to do that and as a result, when trying to add a rule, we will fail to discover a case where an FTE with the same match value was added while we scanned the groups of the same match criteria, thus we may try to add an FTE that was already added. Signed-off-by: Eli Cohen Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index a9ec40ca7893..751dd5869485 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1323,6 +1323,7 @@ add_rule_fte(struct fs_fte *fte, fte->node.active = true; fte->status |= FS_FTE_STATUS_EXISTING; atomic_inc(&fte->node.version); + atomic_inc(&fg->node.version); out: return handle; From 0aad2a0b42596468ad199f4408deb1a167f3ac34 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 4 Mar 2020 10:32:56 +0200 Subject: [PATCH 09/16] net/mlx5: Avoid incrementing FTE version FTE version is not used anywhere in the code so avoid incrementing it. Signed-off-by: Eli Cohen Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 751dd5869485..44ed42e0e1c7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1322,7 +1322,6 @@ add_rule_fte(struct fs_fte *fte, fte->node.active = true; fte->status |= FS_FTE_STATUS_EXISTING; - atomic_inc(&fte->node.version); atomic_inc(&fg->node.version); out: From dc638d1122d25d44e480070fc3497e80fda5aff0 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 4 Mar 2020 10:32:56 +0200 Subject: [PATCH 10/16] net/mlx5: Avoid group version scan when not necessary Group version is used when modifying a rule is allowed (FLOW_ACT_NO_APPEND is clear) to detect a case where the rule was found but while the groups where unlocked a new FTE was added. In this case, the added FTE could be one with identical match value so we need to attempt again with group lock held. Change the code so version is retrieved only when FLOW_ACT_NO_APPEND is cleared. As result, later compare can also be avoided if FLOW_ACT_NO_APPEND is cleared. Also improve comments text. Signed-off-by: Eli Cohen Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/fs_core.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 44ed42e0e1c7..62ce2b9417ab 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1680,7 +1680,7 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, struct match_list *iter; bool take_write = false; struct fs_fte *fte; - u64 version; + u64 version = 0; int err; fte = alloc_fte(ft, spec, flow_act); @@ -1688,10 +1688,12 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, return ERR_PTR(-ENOMEM); search_again_locked: - version = matched_fgs_get_version(match_head); if (flow_act->flags & FLOW_ACT_NO_APPEND) goto skip_search; - /* Try to find a fg that already contains a matching fte */ + version = matched_fgs_get_version(match_head); + /* Try to find an fte with identical match value and attempt update its + * action. + */ list_for_each_entry(iter, match_head, list) { struct fs_fte *fte_tmp; @@ -1719,10 +1721,12 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, goto out; } - /* Check the fgs version, for case the new FTE with the - * same values was added while the fgs weren't locked + /* Check the fgs version. If version have changed it could be that an + * FTE with the same match value was added while the fgs weren't + * locked. */ - if (version != matched_fgs_get_version(match_head)) { + if (!(flow_act->flags & FLOW_ACT_NO_APPEND) && + version != matched_fgs_get_version(match_head)) { take_write = true; goto search_again_locked; } From ecd01db8711d4c608ef6636275e26a8b2069b798 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Sun, 8 Mar 2020 22:19:51 -0500 Subject: [PATCH 11/16] net/mlx5: Simplify mlx5_register_device to return void mlx5_register_device() doesn't check for any error and always returns 0. Simplify mlx5_register_device() to return void and its caller, remove dead code related to it. Reviewed-by: Moshe Shemesh Signed-off-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/dev.c | 4 +--- drivers/net/ethernet/mellanox/mlx5/core/main.c | 14 +++----------- .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 2 +- 3 files changed, 5 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index 50862275544e..1972ddd12704 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -193,7 +193,7 @@ bool mlx5_device_registered(struct mlx5_core_dev *dev) return found; } -int mlx5_register_device(struct mlx5_core_dev *dev) +void mlx5_register_device(struct mlx5_core_dev *dev) { struct mlx5_priv *priv = &dev->priv; struct mlx5_interface *intf; @@ -203,8 +203,6 @@ int mlx5_register_device(struct mlx5_core_dev *dev) list_for_each_entry(intf, &intf_list, list) mlx5_add_device(intf, priv); mutex_unlock(&mlx5_intf_mutex); - - return 0; } void mlx5_unregister_device(struct mlx5_core_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 204a26bf0a5f..dc58feb5a975 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1211,15 +1211,10 @@ int mlx5_load_one(struct mlx5_core_dev *dev, bool boot) goto err_devlink_reg; } - if (mlx5_device_registered(dev)) { + if (mlx5_device_registered(dev)) mlx5_attach_device(dev); - } else { - err = mlx5_register_device(dev); - if (err) { - mlx5_core_err(dev, "register device failed %d\n", err); - goto err_reg_dev; - } - } + else + mlx5_register_device(dev); set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); out: @@ -1227,9 +1222,6 @@ int mlx5_load_one(struct mlx5_core_dev *dev, bool boot) return err; -err_reg_dev: - if (boot) - mlx5_devlink_unregister(priv_to_devlink(dev)); err_devlink_reg: mlx5_unload(dev); err_load: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index da67b28d6e23..8c12f1be27ce 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -182,7 +182,7 @@ void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv); void mlx5_attach_device(struct mlx5_core_dev *dev); void mlx5_detach_device(struct mlx5_core_dev *dev); bool mlx5_device_registered(struct mlx5_core_dev *dev); -int mlx5_register_device(struct mlx5_core_dev *dev); +void mlx5_register_device(struct mlx5_core_dev *dev); void mlx5_unregister_device(struct mlx5_core_dev *dev); void mlx5_add_dev_by_protocol(struct mlx5_core_dev *dev, int protocol); void mlx5_remove_dev_by_protocol(struct mlx5_core_dev *dev, int protocol); From f999b706b7ab5dae45a3ee5c2b3bc2b47c11b0c5 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Sun, 8 Mar 2020 23:17:37 -0500 Subject: [PATCH 12/16] net/mlx5: Simplify mlx5_unload_one() and its callers mlx5_unload_one() always returns 0. Simplify callers of mlx5_unload_one() and remove the dead code. Reviewed-by: Moshe Shemesh Signed-off-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 3 ++- drivers/net/ethernet/mellanox/mlx5/core/main.c | 10 ++-------- drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h | 2 +- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index b7bb81b8c49b..bdeb291f6b67 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -90,7 +90,8 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change, { struct mlx5_core_dev *dev = devlink_priv(devlink); - return mlx5_unload_one(dev, false); + mlx5_unload_one(dev, false); + return 0; } static int mlx5_devlink_reload_up(struct devlink *devlink, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index dc58feb5a975..4a9d9cae8628 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1235,7 +1235,7 @@ int mlx5_load_one(struct mlx5_core_dev *dev, bool boot) return err; } -int mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup) +void mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup) { if (cleanup) { mlx5_unregister_device(dev); @@ -1264,7 +1264,6 @@ int mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup) mlx5_function_teardown(dev, cleanup); out: mutex_unlock(&dev->intf_state_mutex); - return 0; } static int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) @@ -1385,12 +1384,7 @@ static void remove_one(struct pci_dev *pdev) mlx5_crdump_disable(dev); mlx5_devlink_unregister(devlink); - if (mlx5_unload_one(dev, true)) { - mlx5_core_err(dev, "mlx5_unload_one failed\n"); - mlx5_health_flush(dev); - return; - } - + mlx5_unload_one(dev, true); mlx5_pci_close(dev); mlx5_mdev_uninit(dev); mlx5_devlink_free(devlink); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 8c12f1be27ce..a8fb43a85d1d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -244,6 +244,6 @@ enum { u8 mlx5_get_nic_state(struct mlx5_core_dev *dev); void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state); -int mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup); +void mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup); int mlx5_load_one(struct mlx5_core_dev *dev, bool boot); #endif /* __MLX5_CORE_H__ */ From 98fed6eb9b17d4edb1d57b5f51c63b77686aaf1d Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Sun, 23 Feb 2020 19:06:56 -0600 Subject: [PATCH 13/16] devlink: Rely on driver eswitch thread safety instead of devlink devlink_nl_cmd_eswitch_set_doit() doesn't hold devlink->lock mutex while invoking driver callback. This is likely due to eswitch mode setting involves adding/remove devlink ports, health reporters or other devlink objects for a devlink device. So it is driver responsiblity to ensure thread safe eswitch state transition happening via either sriov legacy enablement or via devlink eswitch set callback. Therefore, get() callback should also be invoked without holding devlink->lock mutex. Vendor driver can use same internal lock which it uses during eswitch mode set() callback. This makes get() and set() implimentation symmetric in devlink core and in vendor drivers. Hence, remove holding devlink->lock mutex during eswitch get() callback. Failing to do so results into below deadlock scenario when mlx5_core driver is improved to handle eswitch mode set critical section invoked by devlink and sriov sysfs interface in subsequent patch. devlink_nl_cmd_eswitch_set_doit() mlx5_eswitch_mode_set() mutex_lock(esw->mode_lock) <- Lock A [...] register_devlink_port() mutex_lock(&devlink->lock); <- lock B mutex_lock(&devlink->lock); <- lock B devlink_nl_cmd_eswitch_get_doit() mlx5_eswitch_mode_get() mutex_lock(esw->mode_lock) <- Lock A In subsequent patch, mlx5_core driver uses its internal lock during get() and set() eswitch callbacks. Other drivers have been inspected which returns either constant during get operations or reads the value from already allocated structure. Hence it is safe to remove the lock in get( ) callback and let vendor driver handle it. Reviewed-by: Jiri Pirko Reviewed-by: Mark Bloch Signed-off-by: Parav Pandit Signed-off-by: Saeed Mahameed --- net/core/devlink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/devlink.c b/net/core/devlink.c index 73bb8fbe3393..a9036af7e002 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -6187,7 +6187,8 @@ static const struct genl_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_eswitch_get_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + DEVLINK_NL_FLAG_NO_LOCK, }, { .cmd = DEVLINK_CMD_ESWITCH_SET, From ae24432cbc2b2d7f5a8d636194422604b0b4c4f7 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Sat, 14 Dec 2019 04:09:04 -0600 Subject: [PATCH 14/16] net/mlx5: Split eswitch mode check to different helper function In order to check eswitch state under a lock, prepare code to split capability check and eswitch state check into two helper functions. Reviewed-by: Roi Dayan Reviewed-by: Bodong Wang Reviewed-by: Mark Bloch Signed-off-by: Parav Pandit Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/eswitch_offloads.c | 37 +++++++++++++++++-- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 088fb51123e2..53fcb00ddbac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -2506,13 +2506,18 @@ static int mlx5_eswitch_check(const struct mlx5_core_dev *dev) if(!MLX5_ESWITCH_MANAGER(dev)) return -EPERM; - if (dev->priv.eswitch->mode == MLX5_ESWITCH_NONE && - !mlx5_core_is_ecpf_esw_manager(dev)) - return -EOPNOTSUPP; - return 0; } +static int eswitch_devlink_esw_mode_check(const struct mlx5_eswitch *esw) +{ + /* devlink commands in NONE eswitch mode are currently supported only + * on ECPF. + */ + return (esw->mode == MLX5_ESWITCH_NONE && + !mlx5_core_is_ecpf_esw_manager(esw->dev)) ? -EOPNOTSUPP : 0; +} + int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, struct netlink_ext_ack *extack) { @@ -2524,6 +2529,10 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, if (err) return err; + err = eswitch_devlink_esw_mode_check(dev->priv.eswitch); + if (err) + return err; + cur_mlx5_mode = dev->priv.eswitch->mode; if (esw_mode_from_devlink(mode, &mlx5_mode)) @@ -2549,6 +2558,10 @@ int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) if (err) return err; + err = eswitch_devlink_esw_mode_check(dev->priv.eswitch); + if (err) + return err; + return esw_mode_to_devlink(dev->priv.eswitch->mode, mode); } @@ -2564,6 +2577,10 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, if (err) return err; + err = eswitch_devlink_esw_mode_check(esw); + if (err) + return err; + switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) { case MLX5_CAP_INLINE_MODE_NOT_REQUIRED: if (mode == DEVLINK_ESWITCH_INLINE_MODE_NONE) @@ -2618,6 +2635,10 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode) if (err) return err; + err = eswitch_devlink_esw_mode_check(esw); + if (err) + return err; + return esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode); } @@ -2633,6 +2654,10 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, if (err) return err; + err = eswitch_devlink_esw_mode_check(esw); + if (err) + return err; + if (encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE && (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) || !MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap))) @@ -2682,6 +2707,10 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, if (err) return err; + err = eswitch_devlink_esw_mode_check(esw); + if (err) + return err; + *encap = esw->offloads.encap; return 0; } From ebf77bb83f635377ad7946b73490b18ecf50dc29 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 18 Dec 2019 04:58:58 -0600 Subject: [PATCH 15/16] net/mlx5: E-switch, Extend eswitch enable to handle num_vfs change Subsequent patch protects eswitch mode changes across sriov and devlink interfaces. It is desirable for eswitch to provide thread safe eswitch enable and disable APIs. Hence, extend eswitch enable API to optionally update num_vfs when requested. In subsequent patch, eswitch num_vfs are updated after all the eswitch users eswitch drops its reference count. Reviewed-by: Roi Dayan Reviewed-by: Bodong Wang Reviewed-by: Mark Bloch Signed-off-by: Parav Pandit Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 63 +++++++++++++------ .../net/ethernet/mellanox/mlx5/core/eswitch.h | 10 ++- .../mellanox/mlx5/core/eswitch_offloads.c | 13 ++-- .../net/ethernet/mellanox/mlx5/core/sriov.c | 4 +- 4 files changed, 58 insertions(+), 32 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 8fc351240f4c..a22f9c77e4c3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -2067,7 +2067,48 @@ static void mlx5_eswitch_get_devlink_param(struct mlx5_eswitch *esw) } } -int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode) +static void +mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, int num_vfs) +{ + const u32 *out; + + WARN_ON_ONCE(esw->mode != MLX5_ESWITCH_NONE); + + if (num_vfs < 0) + return; + + if (!mlx5_core_is_ecpf_esw_manager(esw->dev)) { + esw->esw_funcs.num_vfs = num_vfs; + return; + } + + out = mlx5_esw_query_functions(esw->dev); + if (IS_ERR(out)) + return; + + esw->esw_funcs.num_vfs = MLX5_GET(query_esw_functions_out, out, + host_params_context.host_num_of_vfs); + kvfree(out); +} + +/** + * mlx5_eswitch_enable - Enable eswitch + * @esw: Pointer to eswitch + * @mode: Eswitch mode to enable + * @num_vfs: Enable eswitch for given number of VFs. This is optional. + * Valid value are 0, > 0 and MLX5_ESWITCH_IGNORE_NUM_VFS. + * Caller should pass num_vfs > 0 when enabling eswitch for + * vf vports. Caller should pass num_vfs = 0, when eswitch + * is enabled without sriov VFs or when caller + * is unaware of the sriov state of the host PF on ECPF based + * eswitch. Caller should pass < 0 when num_vfs should be + * completely ignored. This is typically the case when eswitch + * is enabled without sriov regardless of PF/ECPF system. + * mlx5_eswitch_enable() Enables eswitch in either legacy or offloads mode. + * If num_vfs >=0 is provided, it setup VF related eswitch vports. It returns + * 0 on success or error code on failure. + */ +int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode, int num_vfs) { int err; @@ -2085,6 +2126,8 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode) mlx5_eswitch_get_devlink_param(esw); + mlx5_eswitch_update_num_of_vfs(esw, num_vfs); + esw_create_tsar(esw); esw->mode = mode; @@ -2811,22 +2854,4 @@ bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0, dev1->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS); } -void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs) -{ - const u32 *out; - WARN_ON_ONCE(esw->mode != MLX5_ESWITCH_NONE); - - if (!mlx5_core_is_ecpf_esw_manager(esw->dev)) { - esw->esw_funcs.num_vfs = num_vfs; - return; - } - - out = mlx5_esw_query_functions(esw->dev); - if (IS_ERR(out)) - return; - - esw->esw_funcs.num_vfs = MLX5_GET(query_esw_functions_out, out, - host_params_context.host_num_of_vfs); - kvfree(out); -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 95532b258c2b..752d399bdffb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -296,7 +296,9 @@ int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, /* E-Switch API */ int mlx5_eswitch_init(struct mlx5_core_dev *dev); void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); -int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode); + +#define MLX5_ESWITCH_IGNORE_NUM_VFS (-1) +int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode, int num_vfs); void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf); int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, u16 vport, u8 mac[ETH_ALEN]); @@ -635,7 +637,6 @@ mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num); bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num); -void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs); int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type, void *data); int @@ -673,7 +674,7 @@ void mlx5_eswitch_unload_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs); /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {} -static inline int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode) { return 0; } +static inline int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode, int num_vfs) { return 0; } static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf) {} static inline bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { return true; } static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; } @@ -682,14 +683,11 @@ static inline const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev) return ERR_PTR(-EOPNOTSUPP); } -static inline void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs) {} - static inline struct mlx5_flow_handle * esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag) { return ERR_PTR(-EOPNOTSUPP); } - #endif /* CONFIG_MLX5_ESWITCH */ #endif /* __MLX5_ESWITCH_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 53fcb00ddbac..84a38f0739d0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1593,12 +1593,13 @@ static int esw_offloads_start(struct mlx5_eswitch *esw, } mlx5_eswitch_disable(esw, false); - mlx5_eswitch_update_num_of_vfs(esw, esw->dev->priv.sriov.num_vfs); - err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_OFFLOADS); + err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_OFFLOADS, + esw->dev->priv.sriov.num_vfs); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to offloads"); - err1 = mlx5_eswitch_enable(esw, MLX5_ESWITCH_LEGACY); + err1 = mlx5_eswitch_enable(esw, MLX5_ESWITCH_LEGACY, + MLX5_ESWITCH_IGNORE_NUM_VFS); if (err1) { NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch back to legacy"); @@ -2397,10 +2398,12 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw, int err, err1; mlx5_eswitch_disable(esw, false); - err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_LEGACY); + err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_LEGACY, + MLX5_ESWITCH_IGNORE_NUM_VFS); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to legacy"); - err1 = mlx5_eswitch_enable(esw, MLX5_ESWITCH_OFFLOADS); + err1 = mlx5_eswitch_enable(esw, MLX5_ESWITCH_OFFLOADS, + MLX5_ESWITCH_IGNORE_NUM_VFS); if (err1) { NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch back to offloads"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index 03f037811f1d..10a64b91d04c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -77,8 +77,8 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) if (!MLX5_ESWITCH_MANAGER(dev)) goto enable_vfs_hca; - mlx5_eswitch_update_num_of_vfs(dev->priv.eswitch, num_vfs); - err = mlx5_eswitch_enable(dev->priv.eswitch, MLX5_ESWITCH_LEGACY); + err = mlx5_eswitch_enable(dev->priv.eswitch, MLX5_ESWITCH_LEGACY, + num_vfs); if (err) { mlx5_core_warn(dev, "failed to enable eswitch SRIOV (%d)\n", err); From 8e0aa4bc959c98c14ed0aaee522d77ca52690189 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 18 Dec 2019 02:51:19 -0600 Subject: [PATCH 16/16] net/mlx5: E-switch, Protect eswitch mode changes Currently eswitch mode change is occurring from 2 different execution contexts as below. 1. sriov sysfs enable/disable 2. devlink eswitch set commands Both of them need to access eswitch related data structures in synchronized manner. Without any synchronization below race condition exist. SR-IOV enable/disable with devlink eswitch mode change: cpu-0 cpu-1 ----- ----- mlx5_device_disable_sriov() mlx5_devlink_eswitch_mode_set() mlx5_eswitch_disable() esw_offloads_stop() esw_offloads_disable() mlx5_eswitch_disable() esw_offloads_disable() Hence, they are synchronized using a new mode_lock. eswitch's state_lock is not used as it can lead to a deadlock scenario below and state_lock is only for vport and fdb exclusive access. ip link set vf netlink rcv_msg() - Lock A rtnl_lock vfinfo() esw->state_lock() - Lock B devlink eswitch_set devlink_mutex esw->state_lock() - Lock B attach_netdev() register_netdev() rtnl_lock - Lock A Alternatives considered: 1. Acquiring rtnl lock before taking esw->state_lock to follow similar locking sequence as ip link flow during eswitch mode set. rtnl lock is not good idea for two reasons. (a) Holding rtnl lock for several hundred device commands is not good idea. (b) It leads to below and more similar deadlocks. devlink eswitch_set devlink_mutex rtnl_lock - Lock A esw->state_lock() - Lock B eswitch_disable() reload() ib_register_device() ib_cache_setup_one() rtnl_lock() 2. Exporting devlink lock may lead to undesired use of it in vendor driver(s) in future. 3. Unloading representors outside of the mode_lock requires serialization with other process trying to enable the eswitch. 4. Differing the representors life cycle to a different workqueue requires synchronization with func_change_handler workqueue. Reviewed-by: Roi Dayan Reviewed-by: Bodong Wang Reviewed-by: Mark Bloch Signed-off-by: Parav Pandit Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 54 +++++++-- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 11 +- .../mellanox/mlx5/core/eswitch_offloads.c | 105 ++++++++++++------ .../net/ethernet/mellanox/mlx5/core/sriov.c | 3 +- 4 files changed, 123 insertions(+), 50 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index a22f9c77e4c3..7f618a443bfd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -2092,7 +2092,7 @@ mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, int num_vfs) } /** - * mlx5_eswitch_enable - Enable eswitch + * mlx5_eswitch_enable_locked - Enable eswitch * @esw: Pointer to eswitch * @mode: Eswitch mode to enable * @num_vfs: Enable eswitch for given number of VFs. This is optional. @@ -2104,16 +2104,17 @@ mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, int num_vfs) * eswitch. Caller should pass < 0 when num_vfs should be * completely ignored. This is typically the case when eswitch * is enabled without sriov regardless of PF/ECPF system. - * mlx5_eswitch_enable() Enables eswitch in either legacy or offloads mode. - * If num_vfs >=0 is provided, it setup VF related eswitch vports. It returns - * 0 on success or error code on failure. + * mlx5_eswitch_enable_locked() Enables eswitch in either legacy or offloads + * mode. If num_vfs >=0 is provided, it setup VF related eswitch vports. + * It returns 0 on success or error code on failure. */ -int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode, int num_vfs) +int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int mode, int num_vfs) { int err; - if (!ESW_ALLOWED(esw) || - !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) { + lockdep_assert_held(&esw->mode_lock); + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) { esw_warn(esw->dev, "FDB is not supported, aborting ...\n"); return -EOPNOTSUPP; } @@ -2164,11 +2165,34 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode, int num_vfs) return err; } -void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf) +/** + * mlx5_eswitch_enable - Enable eswitch + * @esw: Pointer to eswitch + * @num_vfs: Enable eswitch swich for given number of VFs. + * Caller must pass num_vfs > 0 when enabling eswitch for + * vf vports. + * mlx5_eswitch_enable() returns 0 on success or error code on failure. + */ +int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) +{ + int ret; + + if (!ESW_ALLOWED(esw)) + return 0; + + mutex_lock(&esw->mode_lock); + ret = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_LEGACY, num_vfs); + mutex_unlock(&esw->mode_lock); + return ret; +} + +void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw, bool clear_vf) { int old_mode; - if (!ESW_ALLOWED(esw) || esw->mode == MLX5_ESWITCH_NONE) + lockdep_assert_held_write(&esw->mode_lock); + + if (esw->mode == MLX5_ESWITCH_NONE) return; esw_info(esw->dev, "Disable: mode(%s), nvfs(%d), active vports(%d)\n", @@ -2197,6 +2221,16 @@ void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf) mlx5_eswitch_clear_vf_vports_info(esw); } +void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf) +{ + if (!ESW_ALLOWED(esw)) + return; + + mutex_lock(&esw->mode_lock); + mlx5_eswitch_disable_locked(esw, clear_vf); + mutex_unlock(&esw->mode_lock); +} + int mlx5_eswitch_init(struct mlx5_core_dev *dev) { struct mlx5_eswitch *esw; @@ -2248,6 +2282,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) hash_init(esw->offloads.mod_hdr.hlist); atomic64_set(&esw->offloads.num_flows, 0); mutex_init(&esw->state_lock); + mutex_init(&esw->mode_lock); mlx5_esw_for_all_vports(esw, i, vport) { vport->vport = mlx5_eswitch_index_to_vport_num(esw, i); @@ -2282,6 +2317,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) esw->dev->priv.eswitch = NULL; destroy_workqueue(esw->work_queue); esw_offloads_cleanup_reps(esw); + mutex_destroy(&esw->mode_lock); mutex_destroy(&esw->state_lock); mutex_destroy(&esw->offloads.mod_hdr.lock); mutex_destroy(&esw->offloads.encap_tbl_lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 752d399bdffb..39f42f985fbd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -258,6 +258,11 @@ struct mlx5_eswitch { */ struct mutex state_lock; + /* Protects eswitch mode change that occurs via one or more + * user commands, i.e. sriov state change, devlink commands. + */ + struct mutex mode_lock; + struct { bool enabled; u32 root_tsar_id; @@ -298,7 +303,9 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev); void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); #define MLX5_ESWITCH_IGNORE_NUM_VFS (-1) -int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode, int num_vfs); +int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int mode, int num_vfs); +int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs); +void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw, bool clear_vf); void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf); int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, u16 vport, u8 mac[ETH_ALEN]); @@ -674,7 +681,7 @@ void mlx5_eswitch_unload_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs); /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {} -static inline int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode, int num_vfs) { return 0; } +static inline int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) { return 0; } static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf) {} static inline bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { return true; } static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 84a38f0739d0..612bc7d1cdcd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1592,14 +1592,14 @@ static int esw_offloads_start(struct mlx5_eswitch *esw, return -EINVAL; } - mlx5_eswitch_disable(esw, false); - err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_OFFLOADS, - esw->dev->priv.sriov.num_vfs); + mlx5_eswitch_disable_locked(esw, false); + err = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_OFFLOADS, + esw->dev->priv.sriov.num_vfs); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to offloads"); - err1 = mlx5_eswitch_enable(esw, MLX5_ESWITCH_LEGACY, - MLX5_ESWITCH_IGNORE_NUM_VFS); + err1 = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_LEGACY, + MLX5_ESWITCH_IGNORE_NUM_VFS); if (err1) { NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch back to legacy"); @@ -2397,13 +2397,13 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw, { int err, err1; - mlx5_eswitch_disable(esw, false); - err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_LEGACY, - MLX5_ESWITCH_IGNORE_NUM_VFS); + mlx5_eswitch_disable_locked(esw, false); + err = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_LEGACY, + MLX5_ESWITCH_IGNORE_NUM_VFS); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to legacy"); - err1 = mlx5_eswitch_enable(esw, MLX5_ESWITCH_OFFLOADS, - MLX5_ESWITCH_IGNORE_NUM_VFS); + err1 = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_OFFLOADS, + MLX5_ESWITCH_IGNORE_NUM_VFS); if (err1) { NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch back to offloads"); @@ -2525,6 +2525,7 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, struct netlink_ext_ack *extack) { struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_eswitch *esw = dev->priv.eswitch; u16 cur_mlx5_mode, mlx5_mode = 0; int err; @@ -2532,40 +2533,50 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, if (err) return err; - err = eswitch_devlink_esw_mode_check(dev->priv.eswitch); - if (err) - return err; - - cur_mlx5_mode = dev->priv.eswitch->mode; - if (esw_mode_from_devlink(mode, &mlx5_mode)) return -EINVAL; + mutex_lock(&esw->mode_lock); + err = eswitch_devlink_esw_mode_check(esw); + if (err) + goto unlock; + + cur_mlx5_mode = esw->mode; + if (cur_mlx5_mode == mlx5_mode) - return 0; + goto unlock; if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV) - return esw_offloads_start(dev->priv.eswitch, extack); + err = esw_offloads_start(esw, extack); else if (mode == DEVLINK_ESWITCH_MODE_LEGACY) - return esw_offloads_stop(dev->priv.eswitch, extack); + err = esw_offloads_stop(esw, extack); else - return -EINVAL; + err = -EINVAL; + +unlock: + mutex_unlock(&esw->mode_lock); + return err; } int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) { struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_eswitch *esw = dev->priv.eswitch; int err; err = mlx5_eswitch_check(dev); if (err) return err; + mutex_lock(&esw->mode_lock); err = eswitch_devlink_esw_mode_check(dev->priv.eswitch); if (err) - return err; + goto unlock; - return esw_mode_to_devlink(dev->priv.eswitch->mode, mode); + err = esw_mode_to_devlink(esw->mode, mode); +unlock: + mutex_unlock(&esw->mode_lock); + return err; } int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, @@ -2580,18 +2591,20 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, if (err) return err; + mutex_lock(&esw->mode_lock); err = eswitch_devlink_esw_mode_check(esw); if (err) - return err; + goto out; switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) { case MLX5_CAP_INLINE_MODE_NOT_REQUIRED: if (mode == DEVLINK_ESWITCH_INLINE_MODE_NONE) - return 0; + goto out; /* fall through */ case MLX5_CAP_INLINE_MODE_L2: NL_SET_ERR_MSG_MOD(extack, "Inline mode can't be set"); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto out; case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT: break; } @@ -2599,7 +2612,8 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, if (atomic64_read(&esw->offloads.num_flows) > 0) { NL_SET_ERR_MSG_MOD(extack, "Can't set inline mode when flows are configured"); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto out; } err = esw_inline_mode_from_devlink(mode, &mlx5_mode); @@ -2616,6 +2630,7 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, } esw->offloads.inline_mode = mlx5_mode; + mutex_unlock(&esw->mode_lock); return 0; revert_inline_mode: @@ -2625,6 +2640,7 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, vport, esw->offloads.inline_mode); out: + mutex_unlock(&esw->mode_lock); return err; } @@ -2638,11 +2654,15 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode) if (err) return err; + mutex_lock(&esw->mode_lock); err = eswitch_devlink_esw_mode_check(esw); if (err) - return err; + goto unlock; - return esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode); + err = esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode); +unlock: + mutex_unlock(&esw->mode_lock); + return err; } int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, @@ -2657,30 +2677,36 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, if (err) return err; + mutex_lock(&esw->mode_lock); err = eswitch_devlink_esw_mode_check(esw); if (err) - return err; + goto unlock; if (encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE && (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) || - !MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap))) - return -EOPNOTSUPP; + !MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap))) { + err = -EOPNOTSUPP; + goto unlock; + } - if (encap && encap != DEVLINK_ESWITCH_ENCAP_MODE_BASIC) - return -EOPNOTSUPP; + if (encap && encap != DEVLINK_ESWITCH_ENCAP_MODE_BASIC) { + err = -EOPNOTSUPP; + goto unlock; + } if (esw->mode == MLX5_ESWITCH_LEGACY) { esw->offloads.encap = encap; - return 0; + goto unlock; } if (esw->offloads.encap == encap) - return 0; + goto unlock; if (atomic64_read(&esw->offloads.num_flows) > 0) { NL_SET_ERR_MSG_MOD(extack, "Can't set encapsulation when flows are configured"); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto unlock; } esw_destroy_offloads_fdb_tables(esw); @@ -2696,6 +2722,8 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, (void)esw_create_offloads_fdb_tables(esw, esw->nvports); } +unlock: + mutex_unlock(&esw->mode_lock); return err; } @@ -2710,11 +2738,14 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, if (err) return err; + mutex_lock(&esw->mode_lock); err = eswitch_devlink_esw_mode_check(esw); if (err) - return err; + goto unlock; *encap = esw->offloads.encap; +unlock: + mutex_unlock(&esw->mode_lock); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index 10a64b91d04c..3094d20297a9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -77,8 +77,7 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) if (!MLX5_ESWITCH_MANAGER(dev)) goto enable_vfs_hca; - err = mlx5_eswitch_enable(dev->priv.eswitch, MLX5_ESWITCH_LEGACY, - num_vfs); + err = mlx5_eswitch_enable(dev->priv.eswitch, num_vfs); if (err) { mlx5_core_warn(dev, "failed to enable eswitch SRIOV (%d)\n", err);