From 92fe2aa859f52ce6aa595ca97fec110dc7100e63 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 20 Jul 2020 15:07:30 -0700 Subject: [PATCH 01/19] libnvdimm: Validate command family indices The ND_CMD_CALL format allows for a general passthrough of passlisted commands targeting a given command set. However there is no validation of the family index relative to what the bus supports. - Update the NFIT bus implementation (the only one that supports ND_CMD_CALL passthrough) to also passlist the valid set of command family indices. - Update the generic __nd_ioctl() path to validate that field on behalf of all implementations. Fixes: 31eca76ba2fc ("nfit, libnvdimm: limited/whitelisted dimm command marshaling mechanism") Cc: Vishal Verma Cc: Dave Jiang Cc: Ira Weiny Cc: "Rafael J. Wysocki" Cc: Len Brown Cc: Signed-off-by: Dan Williams Signed-off-by: Vishal Verma --- drivers/acpi/nfit/core.c | 11 +++++++++-- drivers/acpi/nfit/nfit.h | 1 - drivers/nvdimm/bus.c | 16 ++++++++++++++++ include/linux/libnvdimm.h | 2 ++ include/uapi/linux/ndctl.h | 4 ++++ 5 files changed, 31 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 7c138a4edc03..1f72ce1a782b 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -1823,6 +1823,7 @@ static void populate_shutdown_status(struct nfit_mem *nfit_mem) static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, struct nfit_mem *nfit_mem, u32 device_handle) { + struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc; struct acpi_device *adev, *adev_dimm; struct device *dev = acpi_desc->dev; unsigned long dsm_mask, label_mask; @@ -1834,6 +1835,7 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, /* nfit test assumes 1:1 relationship between commands and dsms */ nfit_mem->dsm_mask = acpi_desc->dimm_cmd_force_en; nfit_mem->family = NVDIMM_FAMILY_INTEL; + set_bit(NVDIMM_FAMILY_INTEL, &nd_desc->dimm_family_mask); if (dcr->valid_fields & ACPI_NFIT_CONTROL_MFG_INFO_VALID) sprintf(nfit_mem->id, "%04x-%02x-%04x-%08x", @@ -1886,10 +1888,13 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, * Note, that checking for function0 (bit0) tells us if any commands * are reachable through this GUID. */ + clear_bit(NVDIMM_FAMILY_INTEL, &nd_desc->dimm_family_mask); for (i = 0; i <= NVDIMM_FAMILY_MAX; i++) - if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 1)) + if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 1)) { + set_bit(i, &nd_desc->dimm_family_mask); if (family < 0 || i == default_dsm_family) family = i; + } /* limit the supported commands to those that are publicly documented */ nfit_mem->family = family; @@ -2153,6 +2158,9 @@ static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc) nd_desc->cmd_mask = acpi_desc->bus_cmd_force_en; nd_desc->bus_dsm_mask = acpi_desc->bus_nfit_cmd_force_en; + set_bit(ND_CMD_CALL, &nd_desc->cmd_mask); + set_bit(NVDIMM_BUS_FAMILY_NFIT, &nd_desc->bus_family_mask); + adev = to_acpi_dev(acpi_desc); if (!adev) return; @@ -2160,7 +2168,6 @@ static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc) for (i = ND_CMD_ARS_CAP; i <= ND_CMD_CLEAR_ERROR; i++) if (acpi_check_dsm(adev->handle, guid, 1, 1ULL << i)) set_bit(i, &nd_desc->cmd_mask); - set_bit(ND_CMD_CALL, &nd_desc->cmd_mask); dsm_mask = (1 << ND_CMD_ARS_CAP) | diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h index f5525f8bb770..5c5e7ebba8dc 100644 --- a/drivers/acpi/nfit/nfit.h +++ b/drivers/acpi/nfit/nfit.h @@ -33,7 +33,6 @@ | ACPI_NFIT_MEM_RESTORE_FAILED | ACPI_NFIT_MEM_FLUSH_FAILED \ | ACPI_NFIT_MEM_NOT_ARMED | ACPI_NFIT_MEM_MAP_FAILED) -#define NVDIMM_FAMILY_MAX NVDIMM_FAMILY_HYPERV #define NVDIMM_CMD_MAX 31 #define NVDIMM_STANDARD_CMDMASK \ diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 09087c38fabd..955265656b96 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -1037,9 +1037,25 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, dimm_name = "bus"; } + /* Validate command family support against bus declared support */ if (cmd == ND_CMD_CALL) { + unsigned long *mask; + if (copy_from_user(&pkg, p, sizeof(pkg))) return -EFAULT; + + if (nvdimm) { + if (pkg.nd_family > NVDIMM_FAMILY_MAX) + return -EINVAL; + mask = &nd_desc->dimm_family_mask; + } else { + if (pkg.nd_family > NVDIMM_BUS_FAMILY_MAX) + return -EINVAL; + mask = &nd_desc->bus_family_mask; + } + + if (!test_bit(pkg.nd_family, mask)) + return -EINVAL; } if (!desc || diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 18da4059be09..bd39a2cf7972 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -78,6 +78,8 @@ struct nvdimm_bus_descriptor { const struct attribute_group **attr_groups; unsigned long bus_dsm_mask; unsigned long cmd_mask; + unsigned long dimm_family_mask; + unsigned long bus_family_mask; struct module *module; char *provider_name; struct device_node *of_node; diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index 0e09dc5cec19..e9468b9332bd 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -245,6 +245,10 @@ struct nd_cmd_pkg { #define NVDIMM_FAMILY_MSFT 3 #define NVDIMM_FAMILY_HYPERV 4 #define NVDIMM_FAMILY_PAPR 5 +#define NVDIMM_FAMILY_MAX NVDIMM_FAMILY_PAPR + +#define NVDIMM_BUS_FAMILY_NFIT 0 +#define NVDIMM_BUS_FAMILY_MAX NVDIMM_BUS_FAMILY_NFIT #define ND_IOCTL_CALL _IOWR(ND_IOCTL, ND_CMD_CALL,\ struct nd_cmd_pkg) From d46e6a2176f8edf7030db34aeb54a4f016fabe0a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 20 Jul 2020 15:07:35 -0700 Subject: [PATCH 02/19] ACPI: NFIT: Move bus_dsm_mask out of generic nvdimm_bus_descriptor DSMs are strictly an ACPI mechanism, evict the bus_dsm_mask concept from the generic 'struct nvdimm_bus_descriptor' object. As a side effect the test facility ->bus_nfit_cmd_force_en is no longer necessary. The test infrastructure can communicate that information directly in ->bus_dsm_mask. Cc: Vishal Verma Cc: Dave Jiang Cc: Ira Weiny Signed-off-by: Dan Williams Signed-off-by: Vishal Verma --- drivers/acpi/nfit/core.c | 8 ++++---- drivers/acpi/nfit/nfit.h | 2 +- include/linux/libnvdimm.h | 1 - tools/testing/nvdimm/test/nfit.c | 16 ++++++++-------- 4 files changed, 13 insertions(+), 14 deletions(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 1f72ce1a782b..9fdd655bdf0e 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -478,7 +478,7 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, cmd_name = nvdimm_bus_cmd_name(cmd); cmd_mask = nd_desc->cmd_mask; - dsm_mask = nd_desc->bus_dsm_mask; + dsm_mask = acpi_desc->bus_dsm_mask; desc = nd_cmd_bus_desc(cmd); guid = to_nfit_uuid(NFIT_DEV_BUS); handle = adev->handle; @@ -1238,8 +1238,9 @@ static ssize_t bus_dsm_mask_show(struct device *dev, { struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev); struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus); + struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); - return sprintf(buf, "%#lx\n", nd_desc->bus_dsm_mask); + return sprintf(buf, "%#lx\n", acpi_desc->bus_dsm_mask); } static struct device_attribute dev_attr_bus_dsm_mask = __ATTR(dsm_mask, 0444, bus_dsm_mask_show, NULL); @@ -2157,7 +2158,6 @@ static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc) int i; nd_desc->cmd_mask = acpi_desc->bus_cmd_force_en; - nd_desc->bus_dsm_mask = acpi_desc->bus_nfit_cmd_force_en; set_bit(ND_CMD_CALL, &nd_desc->cmd_mask); set_bit(NVDIMM_BUS_FAMILY_NFIT, &nd_desc->bus_family_mask); @@ -2180,7 +2180,7 @@ static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc) (1 << NFIT_CMD_ARS_INJECT_GET); for_each_set_bit(i, &dsm_mask, BITS_PER_LONG) if (acpi_check_dsm(adev->handle, guid, 1, 1ULL << i)) - set_bit(i, &nd_desc->bus_dsm_mask); + set_bit(i, &acpi_desc->bus_dsm_mask); } static ssize_t range_index_show(struct device *dev, diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h index 5c5e7ebba8dc..da097149d94d 100644 --- a/drivers/acpi/nfit/nfit.h +++ b/drivers/acpi/nfit/nfit.h @@ -237,7 +237,7 @@ struct acpi_nfit_desc { unsigned long scrub_flags; unsigned long dimm_cmd_force_en; unsigned long bus_cmd_force_en; - unsigned long bus_nfit_cmd_force_en; + unsigned long bus_dsm_mask; unsigned int platform_cap; unsigned int scrub_tmo; int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa, diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index bd39a2cf7972..ad9898ece7d3 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -76,7 +76,6 @@ typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc, struct device_node; struct nvdimm_bus_descriptor { const struct attribute_group **attr_groups; - unsigned long bus_dsm_mask; unsigned long cmd_mask; unsigned long dimm_family_mask; unsigned long bus_family_mask; diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index a8ee5c4d41eb..a59174ba1d2a 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -2507,10 +2507,10 @@ static void nfit_test0_setup(struct nfit_test *t) set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_CALL, &acpi_desc->bus_cmd_force_en); - set_bit(NFIT_CMD_TRANSLATE_SPA, &acpi_desc->bus_nfit_cmd_force_en); - set_bit(NFIT_CMD_ARS_INJECT_SET, &acpi_desc->bus_nfit_cmd_force_en); - set_bit(NFIT_CMD_ARS_INJECT_CLEAR, &acpi_desc->bus_nfit_cmd_force_en); - set_bit(NFIT_CMD_ARS_INJECT_GET, &acpi_desc->bus_nfit_cmd_force_en); + set_bit(NFIT_CMD_TRANSLATE_SPA, &acpi_desc->bus_dsm_mask); + set_bit(NFIT_CMD_ARS_INJECT_SET, &acpi_desc->bus_dsm_mask); + set_bit(NFIT_CMD_ARS_INJECT_CLEAR, &acpi_desc->bus_dsm_mask); + set_bit(NFIT_CMD_ARS_INJECT_GET, &acpi_desc->bus_dsm_mask); set_bit(ND_INTEL_FW_GET_INFO, &acpi_desc->dimm_cmd_force_en); set_bit(ND_INTEL_FW_START_UPDATE, &acpi_desc->dimm_cmd_force_en); set_bit(ND_INTEL_FW_SEND_DATA, &acpi_desc->dimm_cmd_force_en); @@ -2731,11 +2731,11 @@ static int nfit_ctl_test(struct device *dev) .module = THIS_MODULE, .provider_name = "ACPI.NFIT", .ndctl = acpi_nfit_ctl, - .bus_dsm_mask = 1UL << NFIT_CMD_TRANSLATE_SPA - | 1UL << NFIT_CMD_ARS_INJECT_SET - | 1UL << NFIT_CMD_ARS_INJECT_CLEAR - | 1UL << NFIT_CMD_ARS_INJECT_GET, }, + .bus_dsm_mask = 1UL << NFIT_CMD_TRANSLATE_SPA + | 1UL << NFIT_CMD_ARS_INJECT_SET + | 1UL << NFIT_CMD_ARS_INJECT_CLEAR + | 1UL << NFIT_CMD_ARS_INJECT_GET, .dev = &adev->dev, }; From 6450ddbd5d8e83ea9927c7f9076a21f829699e0f Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 20 Jul 2020 15:07:40 -0700 Subject: [PATCH 03/19] ACPI: NFIT: Define runtime firmware activation commands Platform reboots are expensive. Towards reducing downtime to apply firmware updates the Intel NVDIMM command definition is growing support for applying live firmware updates that only require temporarily suspending memory traffic instead of a full reboot. Follow-on commits add support for triggering firmware activation, this patch only defines the commands, adds probe support, and validates that they are blocked via the ioctl path. The ioctl-path block ensures that the OS is in charge since these commands have side effects only the OS can handle. Specifically firmware activation may cause the memory controller to be quiesced on the order of 100s of milliseconds. In that case Linux ensure the activation only takes place while the OS is in a suspend state. Link: https://pmem.io/documents/IntelOptanePMem_DSM_Interface-V2.0.pdf Cc: Vishal Verma Cc: Dave Jiang Cc: Ira Weiny Cc: "Rafael J. Wysocki" Cc: Len Brown Signed-off-by: Dan Williams Signed-off-by: Vishal Verma --- drivers/acpi/nfit/core.c | 86 +++++++++++++++++++++++++------------- drivers/acpi/nfit/intel.h | 53 +++++++++++++++++++++++ drivers/acpi/nfit/nfit.h | 25 ++++++++++- include/uapi/linux/ndctl.h | 3 +- 4 files changed, 137 insertions(+), 30 deletions(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 9fdd655bdf0e..78cc9e2d2aa3 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -73,6 +73,18 @@ const guid_t *to_nfit_uuid(enum nfit_uuids id) } EXPORT_SYMBOL(to_nfit_uuid); +static const guid_t *to_nfit_bus_uuid(int family) +{ + if (WARN_ONCE(family == NVDIMM_BUS_FAMILY_NFIT, + "only secondary bus families can be translated\n")) + return NULL; + /* + * The index of bus UUIDs starts immediately following the last + * NVDIMM/leaf family. + */ + return to_nfit_uuid(family + NVDIMM_FAMILY_MAX); +} + static struct acpi_device *to_acpi_dev(struct acpi_nfit_desc *acpi_desc) { struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc; @@ -362,24 +374,8 @@ static u8 nfit_dsm_revid(unsigned family, unsigned func) { static const u8 revid_table[NVDIMM_FAMILY_MAX+1][NVDIMM_CMD_MAX+1] = { [NVDIMM_FAMILY_INTEL] = { - [NVDIMM_INTEL_GET_MODES] = 2, - [NVDIMM_INTEL_GET_FWINFO] = 2, - [NVDIMM_INTEL_START_FWUPDATE] = 2, - [NVDIMM_INTEL_SEND_FWUPDATE] = 2, - [NVDIMM_INTEL_FINISH_FWUPDATE] = 2, - [NVDIMM_INTEL_QUERY_FWUPDATE] = 2, - [NVDIMM_INTEL_SET_THRESHOLD] = 2, - [NVDIMM_INTEL_INJECT_ERROR] = 2, - [NVDIMM_INTEL_GET_SECURITY_STATE] = 2, - [NVDIMM_INTEL_SET_PASSPHRASE] = 2, - [NVDIMM_INTEL_DISABLE_PASSPHRASE] = 2, - [NVDIMM_INTEL_UNLOCK_UNIT] = 2, - [NVDIMM_INTEL_FREEZE_LOCK] = 2, - [NVDIMM_INTEL_SECURE_ERASE] = 2, - [NVDIMM_INTEL_OVERWRITE] = 2, - [NVDIMM_INTEL_QUERY_OVERWRITE] = 2, - [NVDIMM_INTEL_SET_MASTER_PASSPHRASE] = 2, - [NVDIMM_INTEL_MASTER_SECURE_ERASE] = 2, + [NVDIMM_INTEL_GET_MODES ... + NVDIMM_INTEL_FW_ACTIVATE_ARM] = 2, }, }; u8 id; @@ -406,7 +402,7 @@ static bool payload_dumpable(struct nvdimm *nvdimm, unsigned int func) } static int cmd_to_func(struct nfit_mem *nfit_mem, unsigned int cmd, - struct nd_cmd_pkg *call_pkg) + struct nd_cmd_pkg *call_pkg, int *family) { if (call_pkg) { int i; @@ -417,6 +413,7 @@ static int cmd_to_func(struct nfit_mem *nfit_mem, unsigned int cmd, for (i = 0; i < ARRAY_SIZE(call_pkg->nd_reserved2); i++) if (call_pkg->nd_reserved2[i]) return -EINVAL; + *family = call_pkg->nd_family; return call_pkg->nd_command; } @@ -450,13 +447,14 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, acpi_handle handle; const guid_t *guid; int func, rc, i; + int family = 0; if (cmd_rc) *cmd_rc = -EINVAL; if (cmd == ND_CMD_CALL) call_pkg = buf; - func = cmd_to_func(nfit_mem, cmd, call_pkg); + func = cmd_to_func(nfit_mem, cmd, call_pkg, &family); if (func < 0) return func; @@ -478,9 +476,17 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, cmd_name = nvdimm_bus_cmd_name(cmd); cmd_mask = nd_desc->cmd_mask; - dsm_mask = acpi_desc->bus_dsm_mask; + if (cmd == ND_CMD_CALL && call_pkg->nd_family) { + family = call_pkg->nd_family; + if (!test_bit(family, &nd_desc->bus_family_mask)) + return -EINVAL; + dsm_mask = acpi_desc->family_dsm_mask[family]; + guid = to_nfit_bus_uuid(family); + } else { + dsm_mask = acpi_desc->bus_dsm_mask; + guid = to_nfit_uuid(NFIT_DEV_BUS); + } desc = nd_cmd_bus_desc(cmd); - guid = to_nfit_uuid(NFIT_DEV_BUS); handle = adev->handle; dimm_name = "bus"; } @@ -516,8 +522,8 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, in_buf.buffer.length = call_pkg->nd_size_in; } - dev_dbg(dev, "%s cmd: %d: func: %d input length: %d\n", - dimm_name, cmd, func, in_buf.buffer.length); + dev_dbg(dev, "%s cmd: %d: family: %d func: %d input length: %d\n", + dimm_name, cmd, family, func, in_buf.buffer.length); if (payload_dumpable(nvdimm, func)) print_hex_dump_debug("nvdimm in ", DUMP_PREFIX_OFFSET, 4, 4, in_buf.buffer.pointer, @@ -2153,14 +2159,21 @@ static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc) { struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc; const guid_t *guid = to_nfit_uuid(NFIT_DEV_BUS); + unsigned long dsm_mask, *mask; struct acpi_device *adev; - unsigned long dsm_mask; int i; - nd_desc->cmd_mask = acpi_desc->bus_cmd_force_en; set_bit(ND_CMD_CALL, &nd_desc->cmd_mask); set_bit(NVDIMM_BUS_FAMILY_NFIT, &nd_desc->bus_family_mask); + /* enable nfit_test to inject bus command emulation */ + if (acpi_desc->bus_cmd_force_en) { + nd_desc->cmd_mask = acpi_desc->bus_cmd_force_en; + mask = &nd_desc->bus_family_mask; + if (acpi_desc->family_dsm_mask[NVDIMM_BUS_FAMILY_INTEL]) + set_bit(NVDIMM_BUS_FAMILY_INTEL, mask); + } + adev = to_acpi_dev(acpi_desc); if (!adev) return; @@ -2181,6 +2194,14 @@ static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc) for_each_set_bit(i, &dsm_mask, BITS_PER_LONG) if (acpi_check_dsm(adev->handle, guid, 1, 1ULL << i)) set_bit(i, &acpi_desc->bus_dsm_mask); + + /* Enumerate allowed NVDIMM_BUS_FAMILY_INTEL commands */ + dsm_mask = NVDIMM_BUS_INTEL_FW_ACTIVATE_CMDMASK; + guid = to_nfit_bus_uuid(NVDIMM_BUS_FAMILY_INTEL); + mask = &acpi_desc->family_dsm_mask[NVDIMM_BUS_FAMILY_INTEL]; + for_each_set_bit(i, &dsm_mask, BITS_PER_LONG) + if (acpi_check_dsm(adev->handle, guid, 1, 1ULL << i)) + set_bit(i, mask); } static ssize_t range_index_show(struct device *dev, @@ -3492,7 +3513,10 @@ static int __acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc, return 0; } -/* prevent security commands from being issued via ioctl */ +/* + * Prevent security and firmware activate commands from being issued via + * ioctl. + */ static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *buf) { @@ -3503,10 +3527,15 @@ static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc, call_pkg->nd_family == NVDIMM_FAMILY_INTEL) { func = call_pkg->nd_command; if (func > NVDIMM_CMD_MAX || - (1 << func) & NVDIMM_INTEL_SECURITY_CMDMASK) + (1 << func) & NVDIMM_INTEL_DENY_CMDMASK) return -EOPNOTSUPP; } + /* block all non-nfit bus commands */ + if (!nvdimm && cmd == ND_CMD_CALL && + call_pkg->nd_family != NVDIMM_BUS_FAMILY_NFIT) + return -EOPNOTSUPP; + return __acpi_nfit_clear_to_send(nd_desc, nvdimm, cmd); } @@ -3798,6 +3827,7 @@ static __init int nfit_init(void) guid_parse(UUID_NFIT_DIMM_N_HPE2, &nfit_uuid[NFIT_DEV_DIMM_N_HPE2]); guid_parse(UUID_NFIT_DIMM_N_MSFT, &nfit_uuid[NFIT_DEV_DIMM_N_MSFT]); guid_parse(UUID_NFIT_DIMM_N_HYPERV, &nfit_uuid[NFIT_DEV_DIMM_N_HYPERV]); + guid_parse(UUID_INTEL_BUS, &nfit_uuid[NFIT_BUS_INTEL]); nfit_wq = create_singlethread_workqueue("nfit"); if (!nfit_wq) diff --git a/drivers/acpi/nfit/intel.h b/drivers/acpi/nfit/intel.h index 0aca682ab9d7..868d073731cc 100644 --- a/drivers/acpi/nfit/intel.h +++ b/drivers/acpi/nfit/intel.h @@ -111,4 +111,57 @@ struct nd_intel_master_secure_erase { u8 passphrase[ND_INTEL_PASSPHRASE_SIZE]; u32 status; } __packed; + +#define ND_INTEL_FWA_IDLE 0 +#define ND_INTEL_FWA_ARMED 1 +#define ND_INTEL_FWA_BUSY 2 + +#define ND_INTEL_DIMM_FWA_NONE 0 +#define ND_INTEL_DIMM_FWA_NOTSTAGED 1 +#define ND_INTEL_DIMM_FWA_SUCCESS 2 +#define ND_INTEL_DIMM_FWA_NEEDRESET 3 +#define ND_INTEL_DIMM_FWA_MEDIAFAILED 4 +#define ND_INTEL_DIMM_FWA_ABORT 5 +#define ND_INTEL_DIMM_FWA_NOTSUPP 6 +#define ND_INTEL_DIMM_FWA_ERROR 7 + +struct nd_intel_fw_activate_dimminfo { + u32 status; + u16 result; + u8 state; + u8 reserved[7]; +} __packed; + +struct nd_intel_fw_activate_arm { + u8 activate_arm; + u32 status; +} __packed; + +/* Root device command payloads */ +#define ND_INTEL_BUS_FWA_CAP_FWQUIESCE (1 << 0) +#define ND_INTEL_BUS_FWA_CAP_OSQUIESCE (1 << 1) +#define ND_INTEL_BUS_FWA_CAP_RESET (1 << 2) + +struct nd_intel_bus_fw_activate_businfo { + u32 status; + u16 reserved; + u8 state; + u8 capability; + u64 activate_tmo; + u64 cpu_quiesce_tmo; + u64 io_quiesce_tmo; + u64 max_quiesce_tmo; +} __packed; + +#define ND_INTEL_BUS_FWA_STATUS_NOARM (6 | 1 << 16) +#define ND_INTEL_BUS_FWA_STATUS_BUSY (6 | 2 << 16) +#define ND_INTEL_BUS_FWA_STATUS_NOFW (6 | 3 << 16) +#define ND_INTEL_BUS_FWA_STATUS_TMO (6 | 4 << 16) +#define ND_INTEL_BUS_FWA_STATUS_NOIDLE (6 | 5 << 16) +#define ND_INTEL_BUS_FWA_STATUS_ABORT (6 | 6 << 16) + +struct nd_intel_bus_fw_activate { + u8 iodev_state; + u32 status; +} __packed; #endif diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h index da097149d94d..97c122628975 100644 --- a/drivers/acpi/nfit/nfit.h +++ b/drivers/acpi/nfit/nfit.h @@ -18,6 +18,7 @@ /* http://pmem.io/documents/NVDIMM_DSM_Interface-V1.6.pdf */ #define UUID_NFIT_DIMM "4309ac30-0d11-11e4-9191-0800200c9a66" +#define UUID_INTEL_BUS "c7d8acd4-2df8-4b82-9f65-a325335af149" /* https://github.com/HewlettPackard/hpe-nvm/blob/master/Documentation/ */ #define UUID_NFIT_DIMM_N_HPE1 "9002c334-acf3-4c0e-9642-a235f0d53bc6" @@ -65,6 +66,13 @@ enum nvdimm_family_cmds { NVDIMM_INTEL_QUERY_OVERWRITE = 26, NVDIMM_INTEL_SET_MASTER_PASSPHRASE = 27, NVDIMM_INTEL_MASTER_SECURE_ERASE = 28, + NVDIMM_INTEL_FW_ACTIVATE_DIMMINFO = 29, + NVDIMM_INTEL_FW_ACTIVATE_ARM = 30, +}; + +enum nvdimm_bus_family_cmds { + NVDIMM_BUS_INTEL_FW_ACTIVATE_BUSINFO = 1, + NVDIMM_BUS_INTEL_FW_ACTIVATE = 2, }; #define NVDIMM_INTEL_SECURITY_CMDMASK \ @@ -75,13 +83,22 @@ enum nvdimm_family_cmds { | 1 << NVDIMM_INTEL_SET_MASTER_PASSPHRASE \ | 1 << NVDIMM_INTEL_MASTER_SECURE_ERASE) +#define NVDIMM_INTEL_FW_ACTIVATE_CMDMASK \ +(1 << NVDIMM_INTEL_FW_ACTIVATE_DIMMINFO | 1 << NVDIMM_INTEL_FW_ACTIVATE_ARM) + +#define NVDIMM_BUS_INTEL_FW_ACTIVATE_CMDMASK \ +(1 << NVDIMM_BUS_INTEL_FW_ACTIVATE_BUSINFO | 1 << NVDIMM_BUS_INTEL_FW_ACTIVATE) + #define NVDIMM_INTEL_CMDMASK \ (NVDIMM_STANDARD_CMDMASK | 1 << NVDIMM_INTEL_GET_MODES \ | 1 << NVDIMM_INTEL_GET_FWINFO | 1 << NVDIMM_INTEL_START_FWUPDATE \ | 1 << NVDIMM_INTEL_SEND_FWUPDATE | 1 << NVDIMM_INTEL_FINISH_FWUPDATE \ | 1 << NVDIMM_INTEL_QUERY_FWUPDATE | 1 << NVDIMM_INTEL_SET_THRESHOLD \ | 1 << NVDIMM_INTEL_INJECT_ERROR | 1 << NVDIMM_INTEL_LATCH_SHUTDOWN \ - | NVDIMM_INTEL_SECURITY_CMDMASK) + | NVDIMM_INTEL_SECURITY_CMDMASK | NVDIMM_INTEL_FW_ACTIVATE_CMDMASK) + +#define NVDIMM_INTEL_DENY_CMDMASK \ +(NVDIMM_INTEL_SECURITY_CMDMASK | NVDIMM_INTEL_FW_ACTIVATE_CMDMASK) enum nfit_uuids { /* for simplicity alias the uuid index with the family id */ @@ -90,6 +107,11 @@ enum nfit_uuids { NFIT_DEV_DIMM_N_HPE2 = NVDIMM_FAMILY_HPE2, NFIT_DEV_DIMM_N_MSFT = NVDIMM_FAMILY_MSFT, NFIT_DEV_DIMM_N_HYPERV = NVDIMM_FAMILY_HYPERV, + /* + * to_nfit_bus_uuid() expects to translate bus uuid family ids + * to a UUID index using NVDIMM_FAMILY_MAX as an offset + */ + NFIT_BUS_INTEL = NVDIMM_FAMILY_MAX + NVDIMM_BUS_FAMILY_INTEL, NFIT_SPA_VOLATILE, NFIT_SPA_PM, NFIT_SPA_DCR, @@ -238,6 +260,7 @@ struct acpi_nfit_desc { unsigned long dimm_cmd_force_en; unsigned long bus_cmd_force_en; unsigned long bus_dsm_mask; + unsigned long family_dsm_mask[NVDIMM_BUS_FAMILY_MAX + 1]; unsigned int platform_cap; unsigned int scrub_tmo; int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa, diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index e9468b9332bd..8cf1e4884fd5 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -248,7 +248,8 @@ struct nd_cmd_pkg { #define NVDIMM_FAMILY_MAX NVDIMM_FAMILY_PAPR #define NVDIMM_BUS_FAMILY_NFIT 0 -#define NVDIMM_BUS_FAMILY_MAX NVDIMM_BUS_FAMILY_NFIT +#define NVDIMM_BUS_FAMILY_INTEL 1 +#define NVDIMM_BUS_FAMILY_MAX NVDIMM_BUS_FAMILY_INTEL #define ND_IOCTL_CALL _IOWR(ND_IOCTL, ND_CMD_CALL,\ struct nd_cmd_pkg) From 24770658dc03bc568dd217b470cba827aeaed582 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 20 Jul 2020 15:07:46 -0700 Subject: [PATCH 04/19] tools/testing/nvdimm: Cleanup dimm index passing The ND_CMD_CALL path only applies to the nfit_test0 emulated DIMMs. Cleanup occurrences of (i - t->dcr_idx) since that offset fixup only applies to cases where nfit_test1 needs a bus-local index. Cc: Vishal Verma Cc: Dave Jiang Cc: Ira Weiny Signed-off-by: Dan Williams Signed-off-by: Vishal Verma --- tools/testing/nvdimm/test/nfit.c | 34 +++++++++++++++++--------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index a59174ba1d2a..ddf9b3095bfa 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -1224,6 +1224,11 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, i = get_dimm(nfit_mem, func); if (i < 0) return i; + if (i >= NUM_DCR) { + dev_WARN_ONCE(&t->pdev.dev, 1, + "ND_CMD_CALL only valid for nfit_test0\n"); + return -EINVAL; + } switch (func) { case NVDIMM_INTEL_GET_SECURITY_STATE: @@ -1252,11 +1257,11 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, break; case NVDIMM_INTEL_OVERWRITE: rc = nd_intel_test_cmd_overwrite(t, - buf, buf_len, i - t->dcr_idx); + buf, buf_len, i); break; case NVDIMM_INTEL_QUERY_OVERWRITE: rc = nd_intel_test_cmd_query_overwrite(t, - buf, buf_len, i - t->dcr_idx); + buf, buf_len, i); break; case NVDIMM_INTEL_SET_MASTER_PASSPHRASE: rc = nd_intel_test_cmd_master_set_pass(t, @@ -1272,48 +1277,45 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, break; case ND_INTEL_FW_GET_INFO: rc = nd_intel_test_get_fw_info(t, buf, - buf_len, i - t->dcr_idx); + buf_len, i); break; case ND_INTEL_FW_START_UPDATE: rc = nd_intel_test_start_update(t, buf, - buf_len, i - t->dcr_idx); + buf_len, i); break; case ND_INTEL_FW_SEND_DATA: rc = nd_intel_test_send_data(t, buf, - buf_len, i - t->dcr_idx); + buf_len, i); break; case ND_INTEL_FW_FINISH_UPDATE: rc = nd_intel_test_finish_fw(t, buf, - buf_len, i - t->dcr_idx); + buf_len, i); break; case ND_INTEL_FW_FINISH_QUERY: rc = nd_intel_test_finish_query(t, buf, - buf_len, i - t->dcr_idx); + buf_len, i); break; case ND_INTEL_SMART: rc = nfit_test_cmd_smart(buf, buf_len, - &t->smart[i - t->dcr_idx]); + &t->smart[i]); break; case ND_INTEL_SMART_THRESHOLD: rc = nfit_test_cmd_smart_threshold(buf, buf_len, - &t->smart_threshold[i - - t->dcr_idx]); + &t->smart_threshold[i]); break; case ND_INTEL_SMART_SET_THRESHOLD: rc = nfit_test_cmd_smart_set_threshold(buf, buf_len, - &t->smart_threshold[i - - t->dcr_idx], - &t->smart[i - t->dcr_idx], + &t->smart_threshold[i], + &t->smart[i], &t->pdev.dev, t->dimm_dev[i]); break; case ND_INTEL_SMART_INJECT: rc = nfit_test_cmd_smart_inject(buf, buf_len, - &t->smart_threshold[i - - t->dcr_idx], - &t->smart[i - t->dcr_idx], + &t->smart_threshold[i], + &t->smart[i], &t->pdev.dev, t->dimm_dev[i]); break; default: From 0d47c4dfe5431abd04951a65845b3d989a704f63 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 20 Jul 2020 15:07:51 -0700 Subject: [PATCH 05/19] tools/testing/nvdimm: Add command debug messages Arrange the for nfit_test_ctl() path to dump command payloads similarly to the acpi_nfit_ctl() path. This is useful for comparing the sequence of command events between an emulated ACPI-NFIT platform and a real one. Cc: Vishal Verma Cc: Dave Jiang Cc: Ira Weiny Signed-off-by: Dan Williams Signed-off-by: Vishal Verma --- tools/testing/nvdimm/test/nfit.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index ddf9b3095bfa..9c6f475befe4 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -1192,6 +1192,29 @@ static int get_dimm(struct nfit_mem *nfit_mem, unsigned int func) return i; } +static void nfit_ctl_dbg(struct acpi_nfit_desc *acpi_desc, + struct nvdimm *nvdimm, unsigned int cmd, void *buf, + unsigned int len) +{ + struct nfit_test *t = container_of(acpi_desc, typeof(*t), acpi_desc); + unsigned int func = cmd; + unsigned int family = 0; + + if (cmd == ND_CMD_CALL) { + struct nd_cmd_pkg *pkg = buf; + + len = pkg->nd_size_in; + family = pkg->nd_family; + buf = pkg->nd_payload; + func = pkg->nd_command; + } + dev_dbg(&t->pdev.dev, "%s family: %d cmd: %d: func: %d input length: %d\n", + nvdimm ? nvdimm_name(nvdimm) : "bus", family, cmd, func, + len); + print_hex_dump_debug("nvdimm in ", DUMP_PREFIX_OFFSET, 16, 4, + buf, min(len, 256u), true); +} + static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc) @@ -1205,6 +1228,8 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, cmd_rc = &__cmd_rc; *cmd_rc = 0; + nfit_ctl_dbg(acpi_desc, nvdimm, cmd, buf, buf_len); + if (nvdimm) { struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); unsigned long cmd_mask = nvdimm_cmd_mask(nvdimm); From abfd4d9c828b9e1d5ff38c19eed036f707e4e213 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 20 Jul 2020 15:07:57 -0700 Subject: [PATCH 06/19] tools/testing/nvdimm: Prepare nfit_ctl_test() for ND_CMD_CALL emulation In preparation for adding a mocked implementation of the firmware-activate bus-info command, rework nfit_ctl_test() to operate on a local command payload wrapped in a 'struct nd_cmd_pkg'. Cc: Vishal Verma Cc: Dave Jiang Cc: Ira Weiny Signed-off-by: Dan Williams Signed-off-by: Vishal Verma --- tools/testing/nvdimm/test/nfit.c | 83 +++++++++++++++++--------------- 1 file changed, 43 insertions(+), 40 deletions(-) diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 9c6f475befe4..2b0bfbfc0abb 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -2726,14 +2726,17 @@ static int nfit_ctl_test(struct device *dev) struct acpi_nfit_desc *acpi_desc; const u64 test_val = 0x0123456789abcdefULL; unsigned long mask, cmd_size, offset; - union { - struct nd_cmd_get_config_size cfg_size; - struct nd_cmd_clear_error clear_err; - struct nd_cmd_ars_status ars_stat; - struct nd_cmd_ars_cap ars_cap; - char buf[sizeof(struct nd_cmd_ars_status) - + sizeof(struct nd_ars_record)]; - } cmds; + struct nfit_ctl_test_cmd { + struct nd_cmd_pkg pkg; + union { + struct nd_cmd_get_config_size cfg_size; + struct nd_cmd_clear_error clear_err; + struct nd_cmd_ars_status ars_stat; + struct nd_cmd_ars_cap ars_cap; + char buf[sizeof(struct nd_cmd_ars_status) + + sizeof(struct nd_ars_record)]; + }; + } cmd; adev = devm_kzalloc(dev, sizeof(*adev), GFP_KERNEL); if (!adev) @@ -2793,21 +2796,21 @@ static int nfit_ctl_test(struct device *dev) /* basic checkout of a typical 'get config size' command */ - cmd_size = sizeof(cmds.cfg_size); - cmds.cfg_size = (struct nd_cmd_get_config_size) { + cmd_size = sizeof(cmd.cfg_size); + cmd.cfg_size = (struct nd_cmd_get_config_size) { .status = 0, .config_size = SZ_128K, .max_xfer = SZ_4K, }; - rc = setup_result(cmds.buf, cmd_size); + rc = setup_result(cmd.buf, cmd_size); if (rc) return rc; rc = acpi_nfit_ctl(&acpi_desc->nd_desc, nvdimm, ND_CMD_GET_CONFIG_SIZE, - cmds.buf, cmd_size, &cmd_rc); + cmd.buf, cmd_size, &cmd_rc); - if (rc < 0 || cmd_rc || cmds.cfg_size.status != 0 - || cmds.cfg_size.config_size != SZ_128K - || cmds.cfg_size.max_xfer != SZ_4K) { + if (rc < 0 || cmd_rc || cmd.cfg_size.status != 0 + || cmd.cfg_size.config_size != SZ_128K + || cmd.cfg_size.max_xfer != SZ_4K) { dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n", __func__, __LINE__, rc, cmd_rc); return -EIO; @@ -2816,14 +2819,14 @@ static int nfit_ctl_test(struct device *dev) /* test ars_status with zero output */ cmd_size = offsetof(struct nd_cmd_ars_status, address); - cmds.ars_stat = (struct nd_cmd_ars_status) { + cmd.ars_stat = (struct nd_cmd_ars_status) { .out_length = 0, }; - rc = setup_result(cmds.buf, cmd_size); + rc = setup_result(cmd.buf, cmd_size); if (rc) return rc; rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_ARS_STATUS, - cmds.buf, cmd_size, &cmd_rc); + cmd.buf, cmd_size, &cmd_rc); if (rc < 0 || cmd_rc) { dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n", @@ -2833,16 +2836,16 @@ static int nfit_ctl_test(struct device *dev) /* test ars_cap with benign extended status */ - cmd_size = sizeof(cmds.ars_cap); - cmds.ars_cap = (struct nd_cmd_ars_cap) { + cmd_size = sizeof(cmd.ars_cap); + cmd.ars_cap = (struct nd_cmd_ars_cap) { .status = ND_ARS_PERSISTENT << 16, }; offset = offsetof(struct nd_cmd_ars_cap, status); - rc = setup_result(cmds.buf + offset, cmd_size - offset); + rc = setup_result(cmd.buf + offset, cmd_size - offset); if (rc) return rc; rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_ARS_CAP, - cmds.buf, cmd_size, &cmd_rc); + cmd.buf, cmd_size, &cmd_rc); if (rc < 0 || cmd_rc) { dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n", @@ -2852,19 +2855,19 @@ static int nfit_ctl_test(struct device *dev) /* test ars_status with 'status' trimmed from 'out_length' */ - cmd_size = sizeof(cmds.ars_stat) + sizeof(struct nd_ars_record); - cmds.ars_stat = (struct nd_cmd_ars_status) { + cmd_size = sizeof(cmd.ars_stat) + sizeof(struct nd_ars_record); + cmd.ars_stat = (struct nd_cmd_ars_status) { .out_length = cmd_size - 4, }; - record = &cmds.ars_stat.records[0]; + record = &cmd.ars_stat.records[0]; *record = (struct nd_ars_record) { .length = test_val, }; - rc = setup_result(cmds.buf, cmd_size); + rc = setup_result(cmd.buf, cmd_size); if (rc) return rc; rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_ARS_STATUS, - cmds.buf, cmd_size, &cmd_rc); + cmd.buf, cmd_size, &cmd_rc); if (rc < 0 || cmd_rc || record->length != test_val) { dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n", @@ -2874,19 +2877,19 @@ static int nfit_ctl_test(struct device *dev) /* test ars_status with 'Output (Size)' including 'status' */ - cmd_size = sizeof(cmds.ars_stat) + sizeof(struct nd_ars_record); - cmds.ars_stat = (struct nd_cmd_ars_status) { + cmd_size = sizeof(cmd.ars_stat) + sizeof(struct nd_ars_record); + cmd.ars_stat = (struct nd_cmd_ars_status) { .out_length = cmd_size, }; - record = &cmds.ars_stat.records[0]; + record = &cmd.ars_stat.records[0]; *record = (struct nd_ars_record) { .length = test_val, }; - rc = setup_result(cmds.buf, cmd_size); + rc = setup_result(cmd.buf, cmd_size); if (rc) return rc; rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_ARS_STATUS, - cmds.buf, cmd_size, &cmd_rc); + cmd.buf, cmd_size, &cmd_rc); if (rc < 0 || cmd_rc || record->length != test_val) { dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n", @@ -2896,15 +2899,15 @@ static int nfit_ctl_test(struct device *dev) /* test extended status for get_config_size results in failure */ - cmd_size = sizeof(cmds.cfg_size); - cmds.cfg_size = (struct nd_cmd_get_config_size) { + cmd_size = sizeof(cmd.cfg_size); + cmd.cfg_size = (struct nd_cmd_get_config_size) { .status = 1 << 16, }; - rc = setup_result(cmds.buf, cmd_size); + rc = setup_result(cmd.buf, cmd_size); if (rc) return rc; rc = acpi_nfit_ctl(&acpi_desc->nd_desc, nvdimm, ND_CMD_GET_CONFIG_SIZE, - cmds.buf, cmd_size, &cmd_rc); + cmd.buf, cmd_size, &cmd_rc); if (rc < 0 || cmd_rc >= 0) { dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n", @@ -2913,16 +2916,16 @@ static int nfit_ctl_test(struct device *dev) } /* test clear error */ - cmd_size = sizeof(cmds.clear_err); - cmds.clear_err = (struct nd_cmd_clear_error) { + cmd_size = sizeof(cmd.clear_err); + cmd.clear_err = (struct nd_cmd_clear_error) { .length = 512, .cleared = 512, }; - rc = setup_result(cmds.buf, cmd_size); + rc = setup_result(cmd.buf, cmd_size); if (rc) return rc; rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_CLEAR_ERROR, - cmds.buf, cmd_size, &cmd_rc); + cmd.buf, cmd_size, &cmd_rc); if (rc < 0 || cmd_rc) { dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n", __func__, __LINE__, rc, cmd_rc); From 916566ae78462636fe4de59b3f59a4a0c8f70205 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 20 Jul 2020 15:08:02 -0700 Subject: [PATCH 07/19] tools/testing/nvdimm: Emulate firmware activation commands Augment the existing firmware update emulation to track activations and validate proper update vs activate sequencing. The DIMM firmware activate capability has a concept of a maximum amount of time platform firmware will quiesce the system relative to how many DIMMs are being activated in parallel. Simulate that DIMM activation happens serially, 1 second per-DIMM, and limit the max at 3 seconds. The nfit_test0 bus emulates 5 DIMMs so it will take 2 activations to update all DIMMs. Cc: Vishal Verma Cc: Dave Jiang Cc: Ira Weiny Reported-by: Andy Shevchenko Signed-off-by: Dan Williams Signed-off-by: Vishal Verma --- drivers/acpi/nfit/intel.h | 5 + tools/testing/nvdimm/test/nfit.c | 209 ++++++++++++++++++++++++++++++- 2 files changed, 210 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/nfit/intel.h b/drivers/acpi/nfit/intel.h index 868d073731cc..49a598623024 100644 --- a/drivers/acpi/nfit/intel.h +++ b/drivers/acpi/nfit/intel.h @@ -132,6 +132,9 @@ struct nd_intel_fw_activate_dimminfo { u8 reserved[7]; } __packed; +#define ND_INTEL_DIMM_FWA_ARM 1 +#define ND_INTEL_DIMM_FWA_DISARM 0 + struct nd_intel_fw_activate_arm { u8 activate_arm; u32 status; @@ -160,6 +163,8 @@ struct nd_intel_bus_fw_activate_businfo { #define ND_INTEL_BUS_FWA_STATUS_NOIDLE (6 | 5 << 16) #define ND_INTEL_BUS_FWA_STATUS_ABORT (6 | 6 << 16) +#define ND_INTEL_BUS_FWA_IODEV_FORCE_IDLE (0) +#define ND_INTEL_BUS_FWA_IODEV_OS_IDLE (1) struct nd_intel_bus_fw_activate { u8 iodev_state; u32 status; diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 2b0bfbfc0abb..a1a5dc645b40 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -173,6 +173,9 @@ struct nfit_test_fw { u64 version; u32 size_received; u64 end_time; + bool armed; + bool missed_activate; + unsigned long last_activate; }; struct nfit_test { @@ -345,7 +348,7 @@ static int nd_intel_test_finish_fw(struct nfit_test *t, __func__, t, nd_cmd, buf_len, idx); if (fw->state == FW_STATE_UPDATED) { - /* update already done, need cold boot */ + /* update already done, need activation */ nd_cmd->status = 0x20007; return 0; } @@ -430,6 +433,7 @@ static int nd_intel_test_finish_query(struct nfit_test *t, } dev_dbg(dev, "%s: transition out verify\n", __func__); fw->state = FW_STATE_UPDATED; + fw->missed_activate = false; /* fall through */ case FW_STATE_UPDATED: nd_cmd->status = 0; @@ -1178,6 +1182,134 @@ static int nd_intel_test_cmd_master_secure_erase(struct nfit_test *t, return 0; } +static unsigned long last_activate; + +static int nvdimm_bus_intel_fw_activate_businfo(struct nfit_test *t, + struct nd_intel_bus_fw_activate_businfo *nd_cmd, + unsigned int buf_len) +{ + int i, armed = 0; + int state; + u64 tmo; + + for (i = 0; i < NUM_DCR; i++) { + struct nfit_test_fw *fw = &t->fw[i]; + + if (fw->armed) + armed++; + } + + /* + * Emulate 3 second activation max, and 1 second incremental + * quiesce time per dimm requiring multiple activates to get all + * DIMMs updated. + */ + if (armed) + state = ND_INTEL_FWA_ARMED; + else if (!last_activate || time_after(jiffies, last_activate + 3 * HZ)) + state = ND_INTEL_FWA_IDLE; + else + state = ND_INTEL_FWA_BUSY; + + tmo = armed * USEC_PER_SEC; + *nd_cmd = (struct nd_intel_bus_fw_activate_businfo) { + .capability = ND_INTEL_BUS_FWA_CAP_FWQUIESCE + | ND_INTEL_BUS_FWA_CAP_OSQUIESCE + | ND_INTEL_BUS_FWA_CAP_RESET, + .state = state, + .activate_tmo = tmo, + .cpu_quiesce_tmo = tmo, + .io_quiesce_tmo = tmo, + .max_quiesce_tmo = 3 * USEC_PER_SEC, + }; + + return 0; +} + +static int nvdimm_bus_intel_fw_activate(struct nfit_test *t, + struct nd_intel_bus_fw_activate *nd_cmd, + unsigned int buf_len) +{ + struct nd_intel_bus_fw_activate_businfo info; + u32 status = 0; + int i; + + nvdimm_bus_intel_fw_activate_businfo(t, &info, sizeof(info)); + if (info.state == ND_INTEL_FWA_BUSY) + status = ND_INTEL_BUS_FWA_STATUS_BUSY; + else if (info.activate_tmo > info.max_quiesce_tmo) + status = ND_INTEL_BUS_FWA_STATUS_TMO; + else if (info.state == ND_INTEL_FWA_IDLE) + status = ND_INTEL_BUS_FWA_STATUS_NOARM; + + dev_dbg(&t->pdev.dev, "status: %d\n", status); + nd_cmd->status = status; + if (status && status != ND_INTEL_BUS_FWA_STATUS_TMO) + return 0; + + last_activate = jiffies; + for (i = 0; i < NUM_DCR; i++) { + struct nfit_test_fw *fw = &t->fw[i]; + + if (!fw->armed) + continue; + if (fw->state != FW_STATE_UPDATED) + fw->missed_activate = true; + else + fw->state = FW_STATE_NEW; + fw->armed = false; + fw->last_activate = last_activate; + } + + return 0; +} + +static int nd_intel_test_cmd_fw_activate_dimminfo(struct nfit_test *t, + struct nd_intel_fw_activate_dimminfo *nd_cmd, + unsigned int buf_len, int dimm) +{ + struct nd_intel_bus_fw_activate_businfo info; + struct nfit_test_fw *fw = &t->fw[dimm]; + u32 result, state; + + nvdimm_bus_intel_fw_activate_businfo(t, &info, sizeof(info)); + + if (info.state == ND_INTEL_FWA_BUSY) + state = ND_INTEL_FWA_BUSY; + else if (info.state == ND_INTEL_FWA_IDLE) + state = ND_INTEL_FWA_IDLE; + else if (fw->armed) + state = ND_INTEL_FWA_ARMED; + else + state = ND_INTEL_FWA_IDLE; + + result = ND_INTEL_DIMM_FWA_NONE; + if (last_activate && fw->last_activate == last_activate && + state == ND_INTEL_FWA_IDLE) { + if (fw->missed_activate) + result = ND_INTEL_DIMM_FWA_NOTSTAGED; + else + result = ND_INTEL_DIMM_FWA_SUCCESS; + } + + *nd_cmd = (struct nd_intel_fw_activate_dimminfo) { + .result = result, + .state = state, + }; + + return 0; +} + +static int nd_intel_test_cmd_fw_activate_arm(struct nfit_test *t, + struct nd_intel_fw_activate_arm *nd_cmd, + unsigned int buf_len, int dimm) +{ + struct nfit_test_fw *fw = &t->fw[dimm]; + + fw->armed = nd_cmd->activate_arm == ND_INTEL_DIMM_FWA_ARM; + nd_cmd->status = 0; + return 0; +} static int get_dimm(struct nfit_mem *nfit_mem, unsigned int func) { @@ -1296,6 +1428,14 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, rc = nd_intel_test_cmd_master_secure_erase(t, buf, buf_len, i); break; + case NVDIMM_INTEL_FW_ACTIVATE_DIMMINFO: + rc = nd_intel_test_cmd_fw_activate_dimminfo( + t, buf, buf_len, i); + break; + case NVDIMM_INTEL_FW_ACTIVATE_ARM: + rc = nd_intel_test_cmd_fw_activate_arm( + t, buf, buf_len, i); + break; case ND_INTEL_ENABLE_LSS_STATUS: rc = nd_intel_test_cmd_set_lss_status(t, buf, buf_len); @@ -1380,9 +1520,9 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, if (!nd_desc) return -ENOTTY; - if (cmd == ND_CMD_CALL) { + if (cmd == ND_CMD_CALL && call_pkg->nd_family + == NVDIMM_BUS_FAMILY_NFIT) { func = call_pkg->nd_command; - buf_len = call_pkg->nd_size_in + call_pkg->nd_size_out; buf = (void *) call_pkg->nd_payload; @@ -1406,7 +1546,26 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, default: return -ENOTTY; } - } + } else if (cmd == ND_CMD_CALL && call_pkg->nd_family + == NVDIMM_BUS_FAMILY_INTEL) { + func = call_pkg->nd_command; + buf_len = call_pkg->nd_size_in + call_pkg->nd_size_out; + buf = (void *) call_pkg->nd_payload; + + switch (func) { + case NVDIMM_BUS_INTEL_FW_ACTIVATE_BUSINFO: + rc = nvdimm_bus_intel_fw_activate_businfo(t, + buf, buf_len); + return rc; + case NVDIMM_BUS_INTEL_FW_ACTIVATE: + rc = nvdimm_bus_intel_fw_activate(t, buf, + buf_len); + return rc; + default: + return -ENOTTY; + } + } else if (cmd == ND_CMD_CALL) + return -ENOTTY; if (!nd_desc || !test_bit(cmd, &nd_desc->cmd_mask)) return -ENOTTY; @@ -1832,6 +1991,7 @@ static void nfit_test0_setup(struct nfit_test *t) struct acpi_nfit_flush_address *flush; struct acpi_nfit_capabilities *pcap; unsigned int offset = 0, i; + unsigned long *acpi_mask; /* * spa0 (interleave first half of dimm0 and dimm1, note storage @@ -2558,6 +2718,12 @@ static void nfit_test0_setup(struct nfit_test *t) &acpi_desc->dimm_cmd_force_en); set_bit(NVDIMM_INTEL_MASTER_SECURE_ERASE, &acpi_desc->dimm_cmd_force_en); + set_bit(NVDIMM_INTEL_FW_ACTIVATE_DIMMINFO, &acpi_desc->dimm_cmd_force_en); + set_bit(NVDIMM_INTEL_FW_ACTIVATE_ARM, &acpi_desc->dimm_cmd_force_en); + + acpi_mask = &acpi_desc->family_dsm_mask[NVDIMM_BUS_FAMILY_INTEL]; + set_bit(NVDIMM_BUS_INTEL_FW_ACTIVATE_BUSINFO, acpi_mask); + set_bit(NVDIMM_BUS_INTEL_FW_ACTIVATE, acpi_mask); } static void nfit_test1_setup(struct nfit_test *t) @@ -2733,6 +2899,7 @@ static int nfit_ctl_test(struct device *dev) struct nd_cmd_clear_error clear_err; struct nd_cmd_ars_status ars_stat; struct nd_cmd_ars_cap ars_cap; + struct nd_intel_bus_fw_activate_businfo fwa_info; char buf[sizeof(struct nd_cmd_ars_status) + sizeof(struct nd_ars_record)]; }; @@ -2761,11 +2928,15 @@ static int nfit_ctl_test(struct device *dev) .module = THIS_MODULE, .provider_name = "ACPI.NFIT", .ndctl = acpi_nfit_ctl, + .bus_family_mask = 1UL << NVDIMM_BUS_FAMILY_NFIT + | 1UL << NVDIMM_BUS_FAMILY_INTEL, }, .bus_dsm_mask = 1UL << NFIT_CMD_TRANSLATE_SPA | 1UL << NFIT_CMD_ARS_INJECT_SET | 1UL << NFIT_CMD_ARS_INJECT_CLEAR | 1UL << NFIT_CMD_ARS_INJECT_GET, + .family_dsm_mask[NVDIMM_BUS_FAMILY_INTEL] = + NVDIMM_BUS_INTEL_FW_ACTIVATE_CMDMASK, .dev = &adev->dev, }; @@ -2932,6 +3103,36 @@ static int nfit_ctl_test(struct device *dev) return -EIO; } + /* test firmware activate bus info */ + cmd_size = sizeof(cmd.fwa_info); + cmd = (struct nfit_ctl_test_cmd) { + .pkg = { + .nd_command = NVDIMM_BUS_INTEL_FW_ACTIVATE_BUSINFO, + .nd_family = NVDIMM_BUS_FAMILY_INTEL, + .nd_size_out = cmd_size, + .nd_fw_size = cmd_size, + }, + .fwa_info = { + .state = ND_INTEL_FWA_IDLE, + .capability = ND_INTEL_BUS_FWA_CAP_FWQUIESCE + | ND_INTEL_BUS_FWA_CAP_OSQUIESCE, + .activate_tmo = 1, + .cpu_quiesce_tmo = 1, + .io_quiesce_tmo = 1, + .max_quiesce_tmo = 1, + }, + }; + rc = setup_result(cmd.buf, cmd_size); + if (rc) + return rc; + rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_CALL, + &cmd, sizeof(cmd.pkg) + cmd_size, &cmd_rc); + if (rc < 0 || cmd_rc) { + dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n", + __func__, __LINE__, rc, cmd_rc); + return -EIO; + } + return 0; } From 60d360acddc54344409a710af07c561e025f13f5 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 20 Jul 2020 15:08:07 -0700 Subject: [PATCH 08/19] driver-core: Introduce DEVICE_ATTR_ADMIN_{RO,RW} A common pattern for using plain DEVICE_ATTR() instead of DEVICE_ATTR_RO() and DEVICE_ATTR_RW() is for attributes that want to limit read to only root. I.e. many users of DEVICE_ATTR() are specifying 0400 or 0600 for permissions. Given the expectation that CAP_SYS_ADMIN is needed to access these sensitive attributes add an explicit helper with the _ADMIN_ identifier for DEVICE_ATTR_ADMIN_{RO,RW}. Cc: "Rafael J. Wysocki" Reviewed-by: Greg Kroah-Hartman Signed-off-by: Dan Williams Signed-off-by: Vishal Verma --- include/linux/device.h | 4 ++++ include/linux/sysfs.h | 7 +++++++ 2 files changed, 11 insertions(+) diff --git a/include/linux/device.h b/include/linux/device.h index 15460a5ac024..d7c2570368fa 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -128,8 +128,12 @@ ssize_t device_store_bool(struct device *dev, struct device_attribute *attr, __ATTR_PREALLOC(_name, _mode, _show, _store) #define DEVICE_ATTR_RW(_name) \ struct device_attribute dev_attr_##_name = __ATTR_RW(_name) +#define DEVICE_ATTR_ADMIN_RW(_name) \ + struct device_attribute dev_attr_##_name = __ATTR_RW_MODE(_name, 0600) #define DEVICE_ATTR_RO(_name) \ struct device_attribute dev_attr_##_name = __ATTR_RO(_name) +#define DEVICE_ATTR_ADMIN_RO(_name) \ + struct device_attribute dev_attr_##_name = __ATTR_RO_MODE(_name, 0400) #define DEVICE_ATTR_WO(_name) \ struct device_attribute dev_attr_##_name = __ATTR_WO(_name) #define DEVICE_ULONG_ATTR(_name, _mode, _var) \ diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 86067dbe7745..34e84122f635 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -123,6 +123,13 @@ struct attribute_group { .show = _name##_show, \ } +#define __ATTR_RW_MODE(_name, _mode) { \ + .attr = { .name = __stringify(_name), \ + .mode = VERIFY_OCTAL_PERMISSIONS(_mode) }, \ + .show = _name##_show, \ + .store = _name##_store, \ +} + #define __ATTR_WO(_name) { \ .attr = { .name = __stringify(_name), .mode = 0200 }, \ .store = _name##_store, \ From 231609785cbfb341e7d6d24a74d6ab8cc518835f Mon Sep 17 00:00:00 2001 From: Coly Li Date: Sun, 26 Jul 2020 00:24:50 +0800 Subject: [PATCH 09/19] dax: print error message by pr_info() in __generic_fsdax_supported() In struct dax_operations, the callback routine dax_supported() returns a bool type result. For false return value, the caller has no idea whether the device does not support dax at all, or it is just some mis- configuration issue. An example is formatting an Ext4 file system on pmem device on top of a NVDIMM namespace by, # mkfs.ext4 /dev/pmem0 If the fs block size does not match kernel space memory page size (which is possible on non-x86 platform), mount this Ext4 file system will fail, # mount -o dax /dev/pmem0 /mnt mount: /mnt: wrong fs type, bad option, bad superblock on /dev/pmem0, missing codepage or helper program, or other error. And from the dmesg output there is only the following information, [ 307.853148] EXT4-fs (pmem0): DAX unsupported by block device. The above information is quite confusing. Because definitely the pmem0 device supports dax operation, and the super block is consistent as how it was created by mkfs.ext4. Indeed the failure is from __generic_fsdax_supported() by the following code piece, if (blocksize != PAGE_SIZE) { pr_debug("%s: error: unsupported blocksize for dax\n", bdevname(bdev, buf)); return false; } It is because the Ext4 block size is 4KB and kernel page size is 8KB or 16KB. It is not simple to make dax_supported() from struct dax_operations or __generic_fsdax_supported() to return exact failure type right now. So the simplest fix is to use pr_info() to print all the error messages inside __generic_fsdax_supported(). Then users may find informative clue from the kernel message at least. Message printed by pr_debug() is very easy to be ignored by users. This patch prints error message by pr_info() in __generic_fsdax_supported(), when then mount fails, following lines can be found from dmesg output, [ 2705.500885] pmem0: error: unsupported blocksize for dax [ 2705.500888] EXT4-fs (pmem0): DAX unsupported by block device. Now the users may have idea the mount failure is from pmem driver for unsupported block size. Link: https://lore.kernel.org/r/20200725162450.95999-1-colyli@suse.de Cc: Dan Williams Cc: Anthony Iliopoulos Reported-by: Michal Suchanek Suggested-by: Jan Kara Reviewed-by: Jan Kara Reviewed-by: Ira Weiny Reviewed-by: Pankaj Gupta Signed-off-by: Coly Li Signed-off-by: Vishal Verma --- drivers/dax/super.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/dax/super.c b/drivers/dax/super.c index 8e32345be0f7..de0d02ec0347 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -80,14 +80,14 @@ bool __generic_fsdax_supported(struct dax_device *dax_dev, int err, id; if (blocksize != PAGE_SIZE) { - pr_debug("%s: error: unsupported blocksize for dax\n", + pr_info("%s: error: unsupported blocksize for dax\n", bdevname(bdev, buf)); return false; } err = bdev_dax_pgoff(bdev, start, PAGE_SIZE, &pgoff); if (err) { - pr_debug("%s: error: unaligned partition for dax\n", + pr_info("%s: error: unaligned partition for dax\n", bdevname(bdev, buf)); return false; } @@ -95,7 +95,7 @@ bool __generic_fsdax_supported(struct dax_device *dax_dev, last_page = PFN_DOWN((start + sectors - 1) * 512) * PAGE_SIZE / 512; err = bdev_dax_pgoff(bdev, last_page, PAGE_SIZE, &pgoff_end); if (err) { - pr_debug("%s: error: unaligned partition for dax\n", + pr_info("%s: error: unaligned partition for dax\n", bdevname(bdev, buf)); return false; } @@ -106,7 +106,7 @@ bool __generic_fsdax_supported(struct dax_device *dax_dev, dax_read_unlock(id); if (len < 1 || len2 < 1) { - pr_debug("%s: error: dax access failed (%ld)\n", + pr_info("%s: error: dax access failed (%ld)\n", bdevname(bdev, buf), len < 1 ? len : len2); return false; } @@ -139,7 +139,7 @@ bool __generic_fsdax_supported(struct dax_device *dax_dev, } if (!dax_enabled) { - pr_debug("%s: error: dax support not enabled\n", + pr_info("%s: error: dax support not enabled\n", bdevname(bdev, buf)); return false; } From c7fe193f1877e80e905a39e494ff53bd20c97970 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Fri, 17 Jul 2020 00:20:49 -0700 Subject: [PATCH 10/19] fs/dax: Remove unused size parameter Passing size to copy_user_dax implies it can copy variable sizes of data when in fact it calls copy_user_page() which is exactly a page. We are safe because the only caller uses PAGE_SIZE anyway so just remove the variable for clarity. While we are at it change copy_user_dax() to copy_cow_page_dax() to make it clear it is a singleton helper for this one case not implementing what dax_iomap_actor() does. Link: https://lore.kernel.org/r/20200717072056.73134-11-ira.weiny@intel.com Reviewed-by: Ben Widawsky Reviewed-by: Dan Williams Signed-off-by: Ira Weiny Signed-off-by: Vishal Verma --- fs/dax.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index 11b16729b86f..3e0babeb0365 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -680,21 +680,20 @@ int dax_invalidate_mapping_entry_sync(struct address_space *mapping, return __dax_invalidate_entry(mapping, index, false); } -static int copy_user_dax(struct block_device *bdev, struct dax_device *dax_dev, - sector_t sector, size_t size, struct page *to, - unsigned long vaddr) +static int copy_cow_page_dax(struct block_device *bdev, struct dax_device *dax_dev, + sector_t sector, struct page *to, unsigned long vaddr) { void *vto, *kaddr; pgoff_t pgoff; long rc; int id; - rc = bdev_dax_pgoff(bdev, sector, size, &pgoff); + rc = bdev_dax_pgoff(bdev, sector, PAGE_SIZE, &pgoff); if (rc) return rc; id = dax_read_lock(); - rc = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), &kaddr, NULL); + rc = dax_direct_access(dax_dev, pgoff, PHYS_PFN(PAGE_SIZE), &kaddr, NULL); if (rc < 0) { dax_read_unlock(id); return rc; @@ -1305,8 +1304,8 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, clear_user_highpage(vmf->cow_page, vaddr); break; case IOMAP_MAPPED: - error = copy_user_dax(iomap.bdev, iomap.dax_dev, - sector, PAGE_SIZE, vmf->cow_page, vaddr); + error = copy_cow_page_dax(iomap.bdev, iomap.dax_dev, + sector, vmf->cow_page, vaddr); break; default: WARN_ON_ONCE(1); From eedfd73d401b5584ab270b5f9f80079e56c7807e Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Fri, 17 Jul 2020 00:20:50 -0700 Subject: [PATCH 11/19] drivers/dax: Expand lock scope to cover the use of addresses The addition of PKS protection to dax read lock/unlock will require that the address returned by dax_direct_access() be protected by this lock. Correct the locking by ensuring that the use of kaddr and end_kaddr are covered by the dax read lock/unlock. Link: https://lore.kernel.org/r/20200717072056.73134-12-ira.weiny@intel.com Reviewed-by: Dan Williams Signed-off-by: Ira Weiny Signed-off-by: Vishal Verma --- drivers/dax/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/dax/super.c b/drivers/dax/super.c index de0d02ec0347..ec62752215e4 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -103,11 +103,11 @@ bool __generic_fsdax_supported(struct dax_device *dax_dev, id = dax_read_lock(); len = dax_direct_access(dax_dev, pgoff, 1, &kaddr, &pfn); len2 = dax_direct_access(dax_dev, pgoff_end, 1, &end_kaddr, &end_pfn); - dax_read_unlock(id); if (len < 1 || len2 < 1) { pr_info("%s: error: dax access failed (%ld)\n", bdevname(bdev, buf), len < 1 ? len : len2); + dax_read_unlock(id); return false; } @@ -137,6 +137,7 @@ bool __generic_fsdax_supported(struct dax_device *dax_dev, put_dev_pagemap(end_pgmap); } + dax_read_unlock(id); if (!dax_enabled) { pr_info("%s: error: dax support not enabled\n", From 5cf81ce1893da01fede18c6749eafd4bc1c5ae9b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 20 Jul 2020 15:08:13 -0700 Subject: [PATCH 12/19] libnvdimm: Convert to DEVICE_ATTR_ADMIN_RO() Move libnvdimm sysfs attributes that currently use an open coded DEVICE_ATTR() to hide sensitive root-only information (physical memory layout) to the new DEVICE_ATTR_ADMIN_RO() helper. Cc: Vishal Verma Cc: Dave Jiang Cc: Ira Weiny Signed-off-by: Dan Williams Signed-off-by: Vishal Verma --- drivers/nvdimm/namespace_devs.c | 2 +- drivers/nvdimm/pfn_devs.c | 2 +- drivers/nvdimm/region_devs.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index ae155e860fdc..6da67f4d641a 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -1309,7 +1309,7 @@ static ssize_t resource_show(struct device *dev, return -ENXIO; return sprintf(buf, "%#llx\n", (unsigned long long) res->start); } -static DEVICE_ATTR(resource, 0400, resource_show, NULL); +static DEVICE_ATTR_ADMIN_RO(resource); static const unsigned long blk_lbasize_supported[] = { 512, 520, 528, 4096, 4104, 4160, 4224, 0 }; diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 34db557dbad1..3e11ef8d3f5b 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -218,7 +218,7 @@ static ssize_t resource_show(struct device *dev, return rc; } -static DEVICE_ATTR(resource, 0400, resource_show, NULL); +static DEVICE_ATTR_ADMIN_RO(resource); static ssize_t size_show(struct device *dev, struct device_attribute *attr, char *buf) diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index 4502f9c4708d..20ff30c2ab93 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -605,7 +605,7 @@ static ssize_t resource_show(struct device *dev, return sprintf(buf, "%#llx\n", nd_region->ndr_start); } -static DEVICE_ATTR(resource, 0400, resource_show, NULL); +static DEVICE_ATTR_ADMIN_RO(resource); static ssize_t persistence_domain_show(struct device *dev, struct device_attribute *attr, char *buf) From 48001ea50d17f3eb06a552e9ecf21f7fc01b25da Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 20 Jul 2020 15:08:18 -0700 Subject: [PATCH 13/19] PM, libnvdimm: Add runtime firmware activation support Abstract platform specific mechanics for nvdimm firmware activation behind a handful of generic ops. At the bus level ->activate_state() indicates the unified state (idle, busy, armed) of all DIMMs on the bus, and ->capability() indicates the system state expectations for activate. At the DIMM level ->activate_state() indicates the per-DIMM state, ->activate_result() indicates the outcome of the last activation attempt, and ->arm() attempts to transition the DIMM from 'idle' to 'armed'. A new hibernate_quiet_exec() facility is added to support firmware activation in an OS defined system quiesce state. It leverages the fact that the hibernate-freeze state wants to assert that a memory hibernation snapshot can be taken. This is in contrast to a platform firmware defined quiesce state that may forcefully quiet the memory controller independent of whether an individual device-driver properly supports hibernate-freeze. The libnvdimm sysfs interface is extended to support detection of a firmware activate capability. The mechanism supports enumeration and triggering of firmware activate, optionally in the hibernate_quiet_exec() context. [rafael: hibernate_quiet_exec() proposal] [vishal: fix up sparse warning, grammar in Documentation/] Cc: Pavel Machek Cc: Ira Weiny Cc: Len Brown Cc: Jonathan Corbet Cc: Dave Jiang Cc: Vishal Verma Reported-by: kernel test robot Co-developed-by: "Rafael J. Wysocki" Signed-off-by: "Rafael J. Wysocki" Signed-off-by: Dan Williams Signed-off-by: Vishal Verma --- Documentation/ABI/testing/sysfs-bus-nvdimm | 2 + .../driver-api/nvdimm/firmware-activate.rst | 86 ++++++++++ drivers/nvdimm/core.c | 149 ++++++++++++++++++ drivers/nvdimm/dimm_devs.c | 115 ++++++++++++++ drivers/nvdimm/nd-core.h | 1 + include/linux/libnvdimm.h | 44 ++++++ include/linux/suspend.h | 6 + kernel/power/hibernate.c | 97 ++++++++++++ 8 files changed, 500 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-bus-nvdimm create mode 100644 Documentation/driver-api/nvdimm/firmware-activate.rst diff --git a/Documentation/ABI/testing/sysfs-bus-nvdimm b/Documentation/ABI/testing/sysfs-bus-nvdimm new file mode 100644 index 000000000000..d64380262be8 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-bus-nvdimm @@ -0,0 +1,2 @@ +The libnvdimm sub-system implements a common sysfs interface for +platform nvdimm resources. See Documentation/driver-api/nvdimm/. diff --git a/Documentation/driver-api/nvdimm/firmware-activate.rst b/Documentation/driver-api/nvdimm/firmware-activate.rst new file mode 100644 index 000000000000..7ee7decbbdc3 --- /dev/null +++ b/Documentation/driver-api/nvdimm/firmware-activate.rst @@ -0,0 +1,86 @@ +.. SPDX-License-Identifier: GPL-2.0 + +================================== +NVDIMM Runtime Firmware Activation +================================== + +Some persistent memory devices run a firmware locally on the device / +"DIMM" to perform tasks like media management, capacity provisioning, +and health monitoring. The process of updating that firmware typically +involves a reboot because it has implications for in-flight memory +transactions. However, reboots are disruptive and at least the Intel +persistent memory platform implementation, described by the Intel ACPI +DSM specification [1], has added support for activating firmware at +runtime. + +A native sysfs interface is implemented in libnvdimm to allow platform +to advertise and control their local runtime firmware activation +capability. + +The libnvdimm bus object, ndbusX, implements an ndbusX/firmware/activate +attribute that shows the state of the firmware activation as one of 'idle', +'armed', 'overflow', and 'busy'. + +- idle: + No devices are set / armed to activate firmware + +- armed: + At least one device is armed + +- busy: + In the busy state armed devices are in the process of transitioning + back to idle and completing an activation cycle. + +- overflow: + If the platform has a concept of incremental work needed to perform + the activation it could be the case that too many DIMMs are armed for + activation. In that scenario the potential for firmware activation to + timeout is indicated by the 'overflow' state. + +The 'ndbusX/firmware/activate' property can be written with a value of +either 'live', or 'quiesce'. A value of 'quiesce' triggers the kernel to +run firmware activation from within the equivalent of the hibernation +'freeze' state where drivers and applications are notified to stop their +modifications of system memory. A value of 'live' attempts +firmware activation without this hibernation cycle. The +'ndbusX/firmware/activate' property will be elided completely if no +firmware activation capability is detected. + +Another property 'ndbusX/firmware/capability' indicates a value of +'live' or 'quiesce', where 'live' indicates that the firmware +does not require or inflict any quiesce period on the system to update +firmware. A capability value of 'quiesce' indicates that firmware does +expect and injects a quiet period for the memory controller, but 'live' +may still be written to 'ndbusX/firmware/activate' as an override to +assume the risk of racing firmware update with in-flight device and +application activity. The 'ndbusX/firmware/capability' property will be +elided completely if no firmware activation capability is detected. + +The libnvdimm memory-device / DIMM object, nmemX, implements +'nmemX/firmware/activate' and 'nmemX/firmware/result' attributes to +communicate the per-device firmware activation state. Similar to the +'ndbusX/firmware/activate' attribute, the 'nmemX/firmware/activate' +attribute indicates 'idle', 'armed', or 'busy'. The state transitions +from 'armed' to 'idle' when the system is prepared to activate firmware, +firmware staged + state set to armed, and 'ndbusX/firmware/activate' is +triggered. After that activation event the nmemX/firmware/result +attribute reflects the state of the last activation as one of: + +- none: + No runtime activation triggered since the last time the device was reset + +- success: + The last runtime activation completed successfully. + +- fail: + The last runtime activation failed for device-specific reasons. + +- not_staged: + The last runtime activation failed due to a sequencing error of the + firmware image not being staged. + +- need_reset: + Runtime firmware activation failed, but the firmware can still be + activated via the legacy method of power-cycling the system. + +[1]: https://docs.pmem.io/persistent-memory/ diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c index fe9bd6febdd2..c21ba0602029 100644 --- a/drivers/nvdimm/core.c +++ b/drivers/nvdimm/core.c @@ -4,6 +4,7 @@ */ #include #include +#include #include #include #include @@ -389,8 +390,156 @@ static const struct attribute_group nvdimm_bus_attribute_group = { .attrs = nvdimm_bus_attributes, }; +static ssize_t capability_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev); + struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc; + enum nvdimm_fwa_capability cap; + + if (!nd_desc->fw_ops) + return -EOPNOTSUPP; + + nvdimm_bus_lock(dev); + cap = nd_desc->fw_ops->capability(nd_desc); + nvdimm_bus_unlock(dev); + + switch (cap) { + case NVDIMM_FWA_CAP_QUIESCE: + return sprintf(buf, "quiesce\n"); + case NVDIMM_FWA_CAP_LIVE: + return sprintf(buf, "live\n"); + default: + return -EOPNOTSUPP; + } +} + +static DEVICE_ATTR_RO(capability); + +static ssize_t activate_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev); + struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc; + enum nvdimm_fwa_capability cap; + enum nvdimm_fwa_state state; + + if (!nd_desc->fw_ops) + return -EOPNOTSUPP; + + nvdimm_bus_lock(dev); + cap = nd_desc->fw_ops->capability(nd_desc); + state = nd_desc->fw_ops->activate_state(nd_desc); + nvdimm_bus_unlock(dev); + + if (cap < NVDIMM_FWA_CAP_QUIESCE) + return -EOPNOTSUPP; + + switch (state) { + case NVDIMM_FWA_IDLE: + return sprintf(buf, "idle\n"); + case NVDIMM_FWA_BUSY: + return sprintf(buf, "busy\n"); + case NVDIMM_FWA_ARMED: + return sprintf(buf, "armed\n"); + case NVDIMM_FWA_ARM_OVERFLOW: + return sprintf(buf, "overflow\n"); + default: + return -ENXIO; + } +} + +static int exec_firmware_activate(void *data) +{ + struct nvdimm_bus_descriptor *nd_desc = data; + + return nd_desc->fw_ops->activate(nd_desc); +} + +static ssize_t activate_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev); + struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc; + enum nvdimm_fwa_state state; + bool quiesce; + ssize_t rc; + + if (!nd_desc->fw_ops) + return -EOPNOTSUPP; + + if (sysfs_streq(buf, "live")) + quiesce = false; + else if (sysfs_streq(buf, "quiesce")) + quiesce = true; + else + return -EINVAL; + + nvdimm_bus_lock(dev); + state = nd_desc->fw_ops->activate_state(nd_desc); + + switch (state) { + case NVDIMM_FWA_BUSY: + rc = -EBUSY; + break; + case NVDIMM_FWA_ARMED: + case NVDIMM_FWA_ARM_OVERFLOW: + if (quiesce) + rc = hibernate_quiet_exec(exec_firmware_activate, nd_desc); + else + rc = nd_desc->fw_ops->activate(nd_desc); + break; + case NVDIMM_FWA_IDLE: + default: + rc = -ENXIO; + } + nvdimm_bus_unlock(dev); + + if (rc == 0) + rc = len; + return rc; +} + +static DEVICE_ATTR_ADMIN_RW(activate); + +static umode_t nvdimm_bus_firmware_visible(struct kobject *kobj, struct attribute *a, int n) +{ + struct device *dev = container_of(kobj, typeof(*dev), kobj); + struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev); + struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc; + enum nvdimm_fwa_capability cap; + + /* + * Both 'activate' and 'capability' disappear when no ops + * detected, or a negative capability is indicated. + */ + if (!nd_desc->fw_ops) + return 0; + + nvdimm_bus_lock(dev); + cap = nd_desc->fw_ops->capability(nd_desc); + nvdimm_bus_unlock(dev); + + if (cap < NVDIMM_FWA_CAP_QUIESCE) + return 0; + + return a->mode; +} +static struct attribute *nvdimm_bus_firmware_attributes[] = { + &dev_attr_activate.attr, + &dev_attr_capability.attr, + NULL, +}; + +static const struct attribute_group nvdimm_bus_firmware_attribute_group = { + .name = "firmware", + .attrs = nvdimm_bus_firmware_attributes, + .is_visible = nvdimm_bus_firmware_visible, +}; + const struct attribute_group *nvdimm_bus_attribute_groups[] = { &nvdimm_bus_attribute_group, + &nvdimm_bus_firmware_attribute_group, NULL, }; diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index b7b77e8d9027..85b53a7f44f2 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -446,9 +446,124 @@ static const struct attribute_group nvdimm_attribute_group = { .is_visible = nvdimm_visible, }; +static ssize_t result_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct nvdimm *nvdimm = to_nvdimm(dev); + enum nvdimm_fwa_result result; + + if (!nvdimm->fw_ops) + return -EOPNOTSUPP; + + nvdimm_bus_lock(dev); + result = nvdimm->fw_ops->activate_result(nvdimm); + nvdimm_bus_unlock(dev); + + switch (result) { + case NVDIMM_FWA_RESULT_NONE: + return sprintf(buf, "none\n"); + case NVDIMM_FWA_RESULT_SUCCESS: + return sprintf(buf, "success\n"); + case NVDIMM_FWA_RESULT_FAIL: + return sprintf(buf, "fail\n"); + case NVDIMM_FWA_RESULT_NOTSTAGED: + return sprintf(buf, "not_staged\n"); + case NVDIMM_FWA_RESULT_NEEDRESET: + return sprintf(buf, "need_reset\n"); + default: + return -ENXIO; + } +} +static DEVICE_ATTR_ADMIN_RO(result); + +static ssize_t activate_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct nvdimm *nvdimm = to_nvdimm(dev); + enum nvdimm_fwa_state state; + + if (!nvdimm->fw_ops) + return -EOPNOTSUPP; + + nvdimm_bus_lock(dev); + state = nvdimm->fw_ops->activate_state(nvdimm); + nvdimm_bus_unlock(dev); + + switch (state) { + case NVDIMM_FWA_IDLE: + return sprintf(buf, "idle\n"); + case NVDIMM_FWA_BUSY: + return sprintf(buf, "busy\n"); + case NVDIMM_FWA_ARMED: + return sprintf(buf, "armed\n"); + default: + return -ENXIO; + } +} + +static ssize_t activate_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t len) +{ + struct nvdimm *nvdimm = to_nvdimm(dev); + enum nvdimm_fwa_trigger arg; + int rc; + + if (!nvdimm->fw_ops) + return -EOPNOTSUPP; + + if (sysfs_streq(buf, "arm")) + arg = NVDIMM_FWA_ARM; + else if (sysfs_streq(buf, "disarm")) + arg = NVDIMM_FWA_DISARM; + else + return -EINVAL; + + nvdimm_bus_lock(dev); + rc = nvdimm->fw_ops->arm(nvdimm, arg); + nvdimm_bus_unlock(dev); + + if (rc < 0) + return rc; + return len; +} +static DEVICE_ATTR_ADMIN_RW(activate); + +static struct attribute *nvdimm_firmware_attributes[] = { + &dev_attr_activate.attr, + &dev_attr_result.attr, +}; + +static umode_t nvdimm_firmware_visible(struct kobject *kobj, struct attribute *a, int n) +{ + struct device *dev = container_of(kobj, typeof(*dev), kobj); + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); + struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc; + struct nvdimm *nvdimm = to_nvdimm(dev); + enum nvdimm_fwa_capability cap; + + if (!nd_desc->fw_ops) + return 0; + if (!nvdimm->fw_ops) + return 0; + + nvdimm_bus_lock(dev); + cap = nd_desc->fw_ops->capability(nd_desc); + nvdimm_bus_unlock(dev); + + if (cap < NVDIMM_FWA_CAP_QUIESCE) + return 0; + + return a->mode; +} + +static const struct attribute_group nvdimm_firmware_attribute_group = { + .name = "firmware", + .attrs = nvdimm_firmware_attributes, + .is_visible = nvdimm_firmware_visible, +}; + static const struct attribute_group *nvdimm_attribute_groups[] = { &nd_device_attribute_group, &nvdimm_attribute_group, + &nvdimm_firmware_attribute_group, NULL, }; diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h index ddb9d97d9129..564faa36a3ca 100644 --- a/drivers/nvdimm/nd-core.h +++ b/drivers/nvdimm/nd-core.h @@ -45,6 +45,7 @@ struct nvdimm { struct kernfs_node *overwrite_state; } sec; struct delayed_work dwork; + const struct nvdimm_fw_ops *fw_ops; }; static inline unsigned long nvdimm_security_flags( diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index ad9898ece7d3..15dbcb718316 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -86,6 +86,7 @@ struct nvdimm_bus_descriptor { int (*flush_probe)(struct nvdimm_bus_descriptor *nd_desc); int (*clear_to_send)(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *data); + const struct nvdimm_bus_fw_ops *fw_ops; }; struct nd_cmd_desc { @@ -200,6 +201,49 @@ struct nvdimm_security_ops { int (*query_overwrite)(struct nvdimm *nvdimm); }; +enum nvdimm_fwa_state { + NVDIMM_FWA_INVALID, + NVDIMM_FWA_IDLE, + NVDIMM_FWA_ARMED, + NVDIMM_FWA_BUSY, + NVDIMM_FWA_ARM_OVERFLOW, +}; + +enum nvdimm_fwa_trigger { + NVDIMM_FWA_ARM, + NVDIMM_FWA_DISARM, +}; + +enum nvdimm_fwa_capability { + NVDIMM_FWA_CAP_INVALID, + NVDIMM_FWA_CAP_NONE, + NVDIMM_FWA_CAP_QUIESCE, + NVDIMM_FWA_CAP_LIVE, +}; + +enum nvdimm_fwa_result { + NVDIMM_FWA_RESULT_INVALID, + NVDIMM_FWA_RESULT_NONE, + NVDIMM_FWA_RESULT_SUCCESS, + NVDIMM_FWA_RESULT_NOTSTAGED, + NVDIMM_FWA_RESULT_NEEDRESET, + NVDIMM_FWA_RESULT_FAIL, +}; + +struct nvdimm_bus_fw_ops { + enum nvdimm_fwa_state (*activate_state) + (struct nvdimm_bus_descriptor *nd_desc); + enum nvdimm_fwa_capability (*capability) + (struct nvdimm_bus_descriptor *nd_desc); + int (*activate)(struct nvdimm_bus_descriptor *nd_desc); +}; + +struct nvdimm_fw_ops { + enum nvdimm_fwa_state (*activate_state)(struct nvdimm *nvdimm); + enum nvdimm_fwa_result (*activate_result)(struct nvdimm *nvdimm); + int (*arm)(struct nvdimm *nvdimm, enum nvdimm_fwa_trigger arg); +}; + void badrange_init(struct badrange *badrange); int badrange_add(struct badrange *badrange, u64 addr, u64 length); void badrange_forget(struct badrange *badrange, phys_addr_t start, diff --git a/include/linux/suspend.h b/include/linux/suspend.h index b960098acfb0..cb9afad82a90 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -453,6 +453,8 @@ extern bool hibernation_available(void); asmlinkage int swsusp_save(void); extern struct pbe *restore_pblist; int pfn_is_nosave(unsigned long pfn); + +int hibernate_quiet_exec(int (*func)(void *data), void *data); #else /* CONFIG_HIBERNATION */ static inline void register_nosave_region(unsigned long b, unsigned long e) {} static inline void register_nosave_region_late(unsigned long b, unsigned long e) {} @@ -464,6 +466,10 @@ static inline void hibernation_set_ops(const struct platform_hibernation_ops *op static inline int hibernate(void) { return -ENOSYS; } static inline bool system_entering_hibernation(void) { return false; } static inline bool hibernation_available(void) { return false; } + +static inline int hibernate_quiet_exec(int (*func)(void *data), void *data) { + return -ENOTSUPP; +} #endif /* CONFIG_HIBERNATION */ #ifdef CONFIG_HIBERNATION_SNAPSHOT_DEV diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 02ec716a4927..e6fab3f09c98 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -795,6 +795,103 @@ int hibernate(void) return error; } +/** + * hibernate_quiet_exec - Execute a function with all devices frozen. + * @func: Function to execute. + * @data: Data pointer to pass to @func. + * + * Return the @func return value or an error code if it cannot be executed. + */ +int hibernate_quiet_exec(int (*func)(void *data), void *data) +{ + int error, nr_calls = 0; + + lock_system_sleep(); + + if (!hibernate_acquire()) { + error = -EBUSY; + goto unlock; + } + + pm_prepare_console(); + + error = __pm_notifier_call_chain(PM_HIBERNATION_PREPARE, -1, &nr_calls); + if (error) { + nr_calls--; + goto exit; + } + + error = freeze_processes(); + if (error) + goto exit; + + lock_device_hotplug(); + + pm_suspend_clear_flags(); + + error = platform_begin(true); + if (error) + goto thaw; + + error = freeze_kernel_threads(); + if (error) + goto thaw; + + error = dpm_prepare(PMSG_FREEZE); + if (error) + goto dpm_complete; + + suspend_console(); + + error = dpm_suspend(PMSG_FREEZE); + if (error) + goto dpm_resume; + + error = dpm_suspend_end(PMSG_FREEZE); + if (error) + goto dpm_resume; + + error = platform_pre_snapshot(true); + if (error) + goto skip; + + error = func(data); + +skip: + platform_finish(true); + + dpm_resume_start(PMSG_THAW); + +dpm_resume: + dpm_resume(PMSG_THAW); + + resume_console(); + +dpm_complete: + dpm_complete(PMSG_THAW); + + thaw_kernel_threads(); + +thaw: + platform_end(true); + + unlock_device_hotplug(); + + thaw_processes(); + +exit: + __pm_notifier_call_chain(PM_POST_HIBERNATION, nr_calls, NULL); + + pm_restore_console(); + + hibernate_release(); + +unlock: + unlock_system_sleep(); + + return error; +} +EXPORT_SYMBOL_GPL(hibernate_quiet_exec); /** * software_resume - Resume from a saved hibernation image. From a1facc1fffc17a65e2c12a8de7434b9325ec0324 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 20 Jul 2020 15:08:24 -0700 Subject: [PATCH 14/19] ACPI: NFIT: Add runtime firmware activate support Plumb the platform specific backend for the generic libnvdimm firmware activate interface. Register dimm level operations to arm/disarm activation, and register bus level operations to report the dynamic platform-quiesce time relative to the number of dimms armed for firmware activation. A new nfit-specific bus attribute "firmware_activate_noidle" is added to allow the activation to switch between platform enforced, and OS opportunistic device quiesce. In other words, let the hibernate cycle handle in-flight device-dma rather than the platform attempting to increase PCI-E timeouts and the like. Cc: Dave Jiang Cc: Ira Weiny Cc: Vishal Verma Signed-off-by: Dan Williams Signed-off-by: Vishal Verma --- Documentation/ABI/testing/sysfs-bus-nfit | 19 ++ drivers/acpi/nfit/core.c | 41 ++- drivers/acpi/nfit/intel.c | 386 +++++++++++++++++++++++ drivers/acpi/nfit/intel.h | 3 + drivers/acpi/nfit/nfit.h | 10 + drivers/nvdimm/dimm_devs.c | 4 +- include/linux/libnvdimm.h | 5 +- 7 files changed, 461 insertions(+), 7 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-bus-nfit b/Documentation/ABI/testing/sysfs-bus-nfit index a1cb44dcb908..e4f76e7eab93 100644 --- a/Documentation/ABI/testing/sysfs-bus-nfit +++ b/Documentation/ABI/testing/sysfs-bus-nfit @@ -202,6 +202,25 @@ Description: functions. See the section named 'NVDIMM Root Device _DSMs' in the ACPI specification. +What: /sys/bus/nd/devices/ndbusX/nfit/firmware_activate_noidle +Date: Apr, 2020 +KernelVersion: v5.8 +Contact: linux-nvdimm@lists.01.org +Description: + (RW) The Intel platform implementation of firmware activate + support exposes an option let the platform force idle devices in + the system over the activation event, or trust that the OS will + do it. The safe default is to let the platform force idle + devices since the kernel is already in a suspend state, and on + the chance that a driver does not properly quiesce bus-mastering + after a suspend callback the platform will handle it. However, + the activation might abort if, for example, platform firmware + determines that the activation time exceeds the max PCI-E + completion timeout. Since the platform does not know whether the + OS is running the activation from a suspend context it aborts, + but if the system owner trusts driver suspend callback to be + sufficient then 'firmware_activation_noidle' can be + enabled to bypass the activation abort. What: /sys/bus/nd/devices/regionX/nfit/range_index Date: Jun, 2015 diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 78cc9e2d2aa3..fb775b967c52 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -1392,8 +1392,12 @@ static umode_t nfit_visible(struct kobject *kobj, struct attribute *a, int n) struct device *dev = container_of(kobj, struct device, kobj); struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev); - if (a == &dev_attr_scrub.attr && !ars_supported(nvdimm_bus)) - return 0; + if (a == &dev_attr_scrub.attr) + return ars_supported(nvdimm_bus) ? a->mode : 0; + + if (a == &dev_attr_firmware_activate_noidle.attr) + return intel_fwa_supported(nvdimm_bus) ? a->mode : 0; + return a->mode; } @@ -1402,6 +1406,7 @@ static struct attribute *acpi_nfit_attributes[] = { &dev_attr_scrub.attr, &dev_attr_hw_error_scrub.attr, &dev_attr_bus_dsm_mask.attr, + &dev_attr_firmware_activate_noidle.attr, NULL, }; @@ -2019,6 +2024,26 @@ static const struct nvdimm_security_ops *acpi_nfit_get_security_ops(int family) } } +static const struct nvdimm_fw_ops *acpi_nfit_get_fw_ops( + struct nfit_mem *nfit_mem) +{ + unsigned long mask; + struct acpi_nfit_desc *acpi_desc = nfit_mem->acpi_desc; + struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc; + + if (!nd_desc->fw_ops) + return NULL; + + if (nfit_mem->family != NVDIMM_FAMILY_INTEL) + return NULL; + + mask = nfit_mem->dsm_mask & NVDIMM_INTEL_FW_ACTIVATE_CMDMASK; + if (mask != NVDIMM_INTEL_FW_ACTIVATE_CMDMASK) + return NULL; + + return intel_fw_ops; +} + static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc) { struct nfit_mem *nfit_mem; @@ -2095,7 +2120,8 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc) acpi_nfit_dimm_attribute_groups, flags, cmd_mask, flush ? flush->hint_count : 0, nfit_mem->flush_wpq, &nfit_mem->id[0], - acpi_nfit_get_security_ops(nfit_mem->family)); + acpi_nfit_get_security_ops(nfit_mem->family), + acpi_nfit_get_fw_ops(nfit_mem)); if (!nvdimm) return -ENOMEM; @@ -2170,8 +2196,10 @@ static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc) if (acpi_desc->bus_cmd_force_en) { nd_desc->cmd_mask = acpi_desc->bus_cmd_force_en; mask = &nd_desc->bus_family_mask; - if (acpi_desc->family_dsm_mask[NVDIMM_BUS_FAMILY_INTEL]) + if (acpi_desc->family_dsm_mask[NVDIMM_BUS_FAMILY_INTEL]) { set_bit(NVDIMM_BUS_FAMILY_INTEL, mask); + nd_desc->fw_ops = intel_bus_fw_ops; + } } adev = to_acpi_dev(acpi_desc); @@ -2202,6 +2230,11 @@ static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc) for_each_set_bit(i, &dsm_mask, BITS_PER_LONG) if (acpi_check_dsm(adev->handle, guid, 1, 1ULL << i)) set_bit(i, mask); + + if (*mask == dsm_mask) { + set_bit(NVDIMM_BUS_FAMILY_INTEL, &nd_desc->bus_family_mask); + nd_desc->fw_ops = intel_bus_fw_ops; + } } static ssize_t range_index_show(struct device *dev, diff --git a/drivers/acpi/nfit/intel.c b/drivers/acpi/nfit/intel.c index 1113b679cd7b..8dd792a55730 100644 --- a/drivers/acpi/nfit/intel.c +++ b/drivers/acpi/nfit/intel.c @@ -7,6 +7,48 @@ #include "intel.h" #include "nfit.h" +static ssize_t firmware_activate_noidle_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev); + struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus); + struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); + + return sprintf(buf, "%s\n", acpi_desc->fwa_noidle ? "Y" : "N"); +} + +static ssize_t firmware_activate_noidle_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t size) +{ + struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev); + struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus); + struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); + ssize_t rc; + bool val; + + rc = kstrtobool(buf, &val); + if (rc) + return rc; + if (val != acpi_desc->fwa_noidle) + acpi_desc->fwa_cap = NVDIMM_FWA_CAP_INVALID; + acpi_desc->fwa_noidle = val; + return size; +} +DEVICE_ATTR_RW(firmware_activate_noidle); + +bool intel_fwa_supported(struct nvdimm_bus *nvdimm_bus) +{ + struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus); + struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); + unsigned long *mask; + + if (!test_bit(NVDIMM_BUS_FAMILY_INTEL, &nd_desc->bus_family_mask)) + return false; + + mask = &acpi_desc->family_dsm_mask[NVDIMM_BUS_FAMILY_INTEL]; + return *mask == NVDIMM_BUS_INTEL_FW_ACTIVATE_CMDMASK; +} + static unsigned long intel_security_flags(struct nvdimm *nvdimm, enum nvdimm_passphrase_type ptype) { @@ -389,3 +431,347 @@ static const struct nvdimm_security_ops __intel_security_ops = { }; const struct nvdimm_security_ops *intel_security_ops = &__intel_security_ops; + +static int intel_bus_fwa_businfo(struct nvdimm_bus_descriptor *nd_desc, + struct nd_intel_bus_fw_activate_businfo *info) +{ + struct { + struct nd_cmd_pkg pkg; + struct nd_intel_bus_fw_activate_businfo cmd; + } nd_cmd = { + .pkg = { + .nd_command = NVDIMM_BUS_INTEL_FW_ACTIVATE_BUSINFO, + .nd_family = NVDIMM_BUS_FAMILY_INTEL, + .nd_size_out = + sizeof(struct nd_intel_bus_fw_activate_businfo), + .nd_fw_size = + sizeof(struct nd_intel_bus_fw_activate_businfo), + }, + }; + int rc; + + rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_CALL, &nd_cmd, sizeof(nd_cmd), + NULL); + *info = nd_cmd.cmd; + return rc; +} + +/* The fw_ops expect to be called with the nvdimm_bus_lock() held */ +static enum nvdimm_fwa_state intel_bus_fwa_state( + struct nvdimm_bus_descriptor *nd_desc) +{ + struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); + struct nd_intel_bus_fw_activate_businfo info; + struct device *dev = acpi_desc->dev; + enum nvdimm_fwa_state state; + int rc; + + /* + * It should not be possible for platform firmware to return + * busy because activate is a synchronous operation. Treat it + * similar to invalid, i.e. always refresh / poll the status. + */ + switch (acpi_desc->fwa_state) { + case NVDIMM_FWA_INVALID: + case NVDIMM_FWA_BUSY: + break; + default: + /* check if capability needs to be refreshed */ + if (acpi_desc->fwa_cap == NVDIMM_FWA_CAP_INVALID) + break; + return acpi_desc->fwa_state; + } + + /* Refresh with platform firmware */ + rc = intel_bus_fwa_businfo(nd_desc, &info); + if (rc) + return NVDIMM_FWA_INVALID; + + switch (info.state) { + case ND_INTEL_FWA_IDLE: + state = NVDIMM_FWA_IDLE; + break; + case ND_INTEL_FWA_BUSY: + state = NVDIMM_FWA_BUSY; + break; + case ND_INTEL_FWA_ARMED: + if (info.activate_tmo > info.max_quiesce_tmo) + state = NVDIMM_FWA_ARM_OVERFLOW; + else + state = NVDIMM_FWA_ARMED; + break; + default: + dev_err_once(dev, "invalid firmware activate state %d\n", + info.state); + return NVDIMM_FWA_INVALID; + } + + /* + * Capability data is available in the same payload as state. It + * is expected to be static. + */ + if (acpi_desc->fwa_cap == NVDIMM_FWA_CAP_INVALID) { + if (info.capability & ND_INTEL_BUS_FWA_CAP_FWQUIESCE) + acpi_desc->fwa_cap = NVDIMM_FWA_CAP_QUIESCE; + else if (info.capability & ND_INTEL_BUS_FWA_CAP_OSQUIESCE) { + /* + * Skip hibernate cycle by default if platform + * indicates that it does not need devices to be + * quiesced. + */ + acpi_desc->fwa_cap = NVDIMM_FWA_CAP_LIVE; + } else + acpi_desc->fwa_cap = NVDIMM_FWA_CAP_NONE; + } + + acpi_desc->fwa_state = state; + + return state; +} + +static enum nvdimm_fwa_capability intel_bus_fwa_capability( + struct nvdimm_bus_descriptor *nd_desc) +{ + struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); + + if (acpi_desc->fwa_cap > NVDIMM_FWA_CAP_INVALID) + return acpi_desc->fwa_cap; + + if (intel_bus_fwa_state(nd_desc) > NVDIMM_FWA_INVALID) + return acpi_desc->fwa_cap; + + return NVDIMM_FWA_CAP_INVALID; +} + +static int intel_bus_fwa_activate(struct nvdimm_bus_descriptor *nd_desc) +{ + struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); + struct { + struct nd_cmd_pkg pkg; + struct nd_intel_bus_fw_activate cmd; + } nd_cmd = { + .pkg = { + .nd_command = NVDIMM_BUS_INTEL_FW_ACTIVATE, + .nd_family = NVDIMM_BUS_FAMILY_INTEL, + .nd_size_in = sizeof(nd_cmd.cmd.iodev_state), + .nd_size_out = + sizeof(struct nd_intel_bus_fw_activate), + .nd_fw_size = + sizeof(struct nd_intel_bus_fw_activate), + }, + /* + * Even though activate is run from a suspended context, + * for safety, still ask platform firmware to force + * quiesce devices by default. Let a module + * parameter override that policy. + */ + .cmd = { + .iodev_state = acpi_desc->fwa_noidle + ? ND_INTEL_BUS_FWA_IODEV_OS_IDLE + : ND_INTEL_BUS_FWA_IODEV_FORCE_IDLE, + }, + }; + int rc; + + switch (intel_bus_fwa_state(nd_desc)) { + case NVDIMM_FWA_ARMED: + case NVDIMM_FWA_ARM_OVERFLOW: + break; + default: + return -ENXIO; + } + + rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_CALL, &nd_cmd, sizeof(nd_cmd), + NULL); + + /* + * Whether the command succeeded, or failed, the agent checking + * for the result needs to query the DIMMs individually. + * Increment the activation count to invalidate all the DIMM + * states at once (it's otherwise not possible to take + * acpi_desc->init_mutex in this context) + */ + acpi_desc->fwa_state = NVDIMM_FWA_INVALID; + acpi_desc->fwa_count++; + + dev_dbg(acpi_desc->dev, "result: %d\n", rc); + + return rc; +} + +static const struct nvdimm_bus_fw_ops __intel_bus_fw_ops = { + .activate_state = intel_bus_fwa_state, + .capability = intel_bus_fwa_capability, + .activate = intel_bus_fwa_activate, +}; + +const struct nvdimm_bus_fw_ops *intel_bus_fw_ops = &__intel_bus_fw_ops; + +static int intel_fwa_dimminfo(struct nvdimm *nvdimm, + struct nd_intel_fw_activate_dimminfo *info) +{ + struct { + struct nd_cmd_pkg pkg; + struct nd_intel_fw_activate_dimminfo cmd; + } nd_cmd = { + .pkg = { + .nd_command = NVDIMM_INTEL_FW_ACTIVATE_DIMMINFO, + .nd_family = NVDIMM_FAMILY_INTEL, + .nd_size_out = + sizeof(struct nd_intel_fw_activate_dimminfo), + .nd_fw_size = + sizeof(struct nd_intel_fw_activate_dimminfo), + }, + }; + int rc; + + rc = nvdimm_ctl(nvdimm, ND_CMD_CALL, &nd_cmd, sizeof(nd_cmd), NULL); + *info = nd_cmd.cmd; + return rc; +} + +static enum nvdimm_fwa_state intel_fwa_state(struct nvdimm *nvdimm) +{ + struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); + struct acpi_nfit_desc *acpi_desc = nfit_mem->acpi_desc; + struct nd_intel_fw_activate_dimminfo info; + int rc; + + /* + * Similar to the bus state, since activate is synchronous the + * busy state should resolve within the context of 'activate'. + */ + switch (nfit_mem->fwa_state) { + case NVDIMM_FWA_INVALID: + case NVDIMM_FWA_BUSY: + break; + default: + /* If no activations occurred the old state is still valid */ + if (nfit_mem->fwa_count == acpi_desc->fwa_count) + return nfit_mem->fwa_state; + } + + rc = intel_fwa_dimminfo(nvdimm, &info); + if (rc) + return NVDIMM_FWA_INVALID; + + switch (info.state) { + case ND_INTEL_FWA_IDLE: + nfit_mem->fwa_state = NVDIMM_FWA_IDLE; + break; + case ND_INTEL_FWA_BUSY: + nfit_mem->fwa_state = NVDIMM_FWA_BUSY; + break; + case ND_INTEL_FWA_ARMED: + nfit_mem->fwa_state = NVDIMM_FWA_ARMED; + break; + default: + nfit_mem->fwa_state = NVDIMM_FWA_INVALID; + break; + } + + switch (info.result) { + case ND_INTEL_DIMM_FWA_NONE: + nfit_mem->fwa_result = NVDIMM_FWA_RESULT_NONE; + break; + case ND_INTEL_DIMM_FWA_SUCCESS: + nfit_mem->fwa_result = NVDIMM_FWA_RESULT_SUCCESS; + break; + case ND_INTEL_DIMM_FWA_NOTSTAGED: + nfit_mem->fwa_result = NVDIMM_FWA_RESULT_NOTSTAGED; + break; + case ND_INTEL_DIMM_FWA_NEEDRESET: + nfit_mem->fwa_result = NVDIMM_FWA_RESULT_NEEDRESET; + break; + case ND_INTEL_DIMM_FWA_MEDIAFAILED: + case ND_INTEL_DIMM_FWA_ABORT: + case ND_INTEL_DIMM_FWA_NOTSUPP: + case ND_INTEL_DIMM_FWA_ERROR: + default: + nfit_mem->fwa_result = NVDIMM_FWA_RESULT_FAIL; + break; + } + + nfit_mem->fwa_count = acpi_desc->fwa_count; + + return nfit_mem->fwa_state; +} + +static enum nvdimm_fwa_result intel_fwa_result(struct nvdimm *nvdimm) +{ + struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); + struct acpi_nfit_desc *acpi_desc = nfit_mem->acpi_desc; + + if (nfit_mem->fwa_count == acpi_desc->fwa_count + && nfit_mem->fwa_result > NVDIMM_FWA_RESULT_INVALID) + return nfit_mem->fwa_result; + + if (intel_fwa_state(nvdimm) > NVDIMM_FWA_INVALID) + return nfit_mem->fwa_result; + + return NVDIMM_FWA_RESULT_INVALID; +} + +static int intel_fwa_arm(struct nvdimm *nvdimm, enum nvdimm_fwa_trigger arm) +{ + struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); + struct acpi_nfit_desc *acpi_desc = nfit_mem->acpi_desc; + struct { + struct nd_cmd_pkg pkg; + struct nd_intel_fw_activate_arm cmd; + } nd_cmd = { + .pkg = { + .nd_command = NVDIMM_INTEL_FW_ACTIVATE_ARM, + .nd_family = NVDIMM_FAMILY_INTEL, + .nd_size_in = sizeof(nd_cmd.cmd.activate_arm), + .nd_size_out = + sizeof(struct nd_intel_fw_activate_arm), + .nd_fw_size = + sizeof(struct nd_intel_fw_activate_arm), + }, + .cmd = { + .activate_arm = arm == NVDIMM_FWA_ARM + ? ND_INTEL_DIMM_FWA_ARM + : ND_INTEL_DIMM_FWA_DISARM, + }, + }; + int rc; + + switch (intel_fwa_state(nvdimm)) { + case NVDIMM_FWA_INVALID: + return -ENXIO; + case NVDIMM_FWA_BUSY: + return -EBUSY; + case NVDIMM_FWA_IDLE: + if (arm == NVDIMM_FWA_DISARM) + return 0; + break; + case NVDIMM_FWA_ARMED: + if (arm == NVDIMM_FWA_ARM) + return 0; + break; + default: + return -ENXIO; + } + + /* + * Invalidate the bus-level state, now that we're committed to + * changing the 'arm' state. + */ + acpi_desc->fwa_state = NVDIMM_FWA_INVALID; + nfit_mem->fwa_state = NVDIMM_FWA_INVALID; + + rc = nvdimm_ctl(nvdimm, ND_CMD_CALL, &nd_cmd, sizeof(nd_cmd), NULL); + + dev_dbg(acpi_desc->dev, "%s result: %d\n", arm == NVDIMM_FWA_ARM + ? "arm" : "disarm", rc); + return rc; +} + +static const struct nvdimm_fw_ops __intel_fw_ops = { + .activate_state = intel_fwa_state, + .activate_result = intel_fwa_result, + .arm = intel_fwa_arm, +}; + +const struct nvdimm_fw_ops *intel_fw_ops = &__intel_fw_ops; diff --git a/drivers/acpi/nfit/intel.h b/drivers/acpi/nfit/intel.h index 49a598623024..b768234ccebc 100644 --- a/drivers/acpi/nfit/intel.h +++ b/drivers/acpi/nfit/intel.h @@ -169,4 +169,7 @@ struct nd_intel_bus_fw_activate { u8 iodev_state; u32 status; } __packed; + +extern const struct nvdimm_fw_ops *intel_fw_ops; +extern const struct nvdimm_bus_fw_ops *intel_bus_fw_ops; #endif diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h index 97c122628975..67b7807ed200 100644 --- a/drivers/acpi/nfit/nfit.h +++ b/drivers/acpi/nfit/nfit.h @@ -220,6 +220,9 @@ struct nfit_mem { struct list_head list; struct acpi_device *adev; struct acpi_nfit_desc *acpi_desc; + enum nvdimm_fwa_state fwa_state; + enum nvdimm_fwa_result fwa_result; + int fwa_count; char id[NFIT_DIMM_ID_LEN+1]; struct resource *flush_wpq; unsigned long dsm_mask; @@ -265,6 +268,11 @@ struct acpi_nfit_desc { unsigned int scrub_tmo; int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa, void *iobuf, u64 len, int rw); + enum nvdimm_fwa_state fwa_state; + enum nvdimm_fwa_capability fwa_cap; + int fwa_count; + bool fwa_noidle; + bool fwa_nosuspend; }; enum scrub_mode { @@ -367,4 +375,6 @@ void __acpi_nvdimm_notify(struct device *dev, u32 event); int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc); void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev); +bool intel_fwa_supported(struct nvdimm_bus *nvdimm_bus); +extern struct device_attribute dev_attr_firmware_activate_noidle; #endif /* __NFIT_H__ */ diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index 85b53a7f44f2..2f0815e15986 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -582,7 +582,8 @@ struct nvdimm *__nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, const struct attribute_group **groups, unsigned long flags, unsigned long cmd_mask, int num_flush, struct resource *flush_wpq, const char *dimm_id, - const struct nvdimm_security_ops *sec_ops) + const struct nvdimm_security_ops *sec_ops, + const struct nvdimm_fw_ops *fw_ops) { struct nvdimm *nvdimm = kzalloc(sizeof(*nvdimm), GFP_KERNEL); struct device *dev; @@ -612,6 +613,7 @@ struct nvdimm *__nvdimm_create(struct nvdimm_bus *nvdimm_bus, dev->devt = MKDEV(nvdimm_major, nvdimm->id); dev->groups = groups; nvdimm->sec.ops = sec_ops; + nvdimm->fw_ops = fw_ops; nvdimm->sec.overwrite_tmo = 0; INIT_DELAYED_WORK(&nvdimm->dwork, nvdimm_security_overwrite_query); /* diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 15dbcb718316..01f251b6e36c 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -269,14 +269,15 @@ struct nvdimm *__nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, const struct attribute_group **groups, unsigned long flags, unsigned long cmd_mask, int num_flush, struct resource *flush_wpq, const char *dimm_id, - const struct nvdimm_security_ops *sec_ops); + const struct nvdimm_security_ops *sec_ops, + const struct nvdimm_fw_ops *fw_ops); static inline struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, const struct attribute_group **groups, unsigned long flags, unsigned long cmd_mask, int num_flush, struct resource *flush_wpq) { return __nvdimm_create(nvdimm_bus, provider_data, groups, flags, - cmd_mask, num_flush, flush_wpq, NULL, NULL); + cmd_mask, num_flush, flush_wpq, NULL, NULL, NULL); } const struct nd_cmd_desc *nd_cmd_dimm_desc(int cmd); From 49688e654e48ae176b5bfda33d2416b68e607ce8 Mon Sep 17 00:00:00 2001 From: Hao Li Date: Wed, 29 Jul 2020 11:44:36 +0800 Subject: [PATCH 15/19] dax: Fix incorrect argument passed to xas_set_err() The argument passed to xas_set_err() to indicate an error should be negative. Otherwise, xas_error() will return 0, and grab_mapping_entry() will return the found entry instead of 'SIGBUS' when the entry is not in fact valid. This would result in problems in subsequent code paths. Link: https://lore.kernel.org/r/20200729034436.24267-1-lihao2018.fnst@cn.fujitsu.com Reviewed-by: Pankaj Gupta Signed-off-by: Hao Li Signed-off-by: Vishal Verma --- fs/dax.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/dax.c b/fs/dax.c index 3e0babeb0365..95341af1a966 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -488,7 +488,7 @@ static void *grab_mapping_entry(struct xa_state *xas, if (dax_is_conflict(entry)) goto fallback; if (!xa_is_value(entry)) { - xas_set_err(xas, EIO); + xas_set_err(xas, -EIO); goto out_unlock; } From 9f1048d47e8ae4cc21e80facf2ba4b01a6623b67 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 31 Jul 2020 18:38:26 -0700 Subject: [PATCH 16/19] ACPI: NFIT: Fix ARS zero-sized allocation Pending commit in -next "devres: handle zero size in devm_kmalloc()" triggers a boot regression due to the ARS implementation expecting NULL from a zero-sized allocation. Avoid the zero-sized allocation by skipping ARS, otherwise crashes with the following signature when de-referencing ZERO_SIZE_PTR. BUG: kernel NULL pointer dereference, address: 0000000000000018 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page RIP: 0010:__acpi_nfit_scrub+0x28a/0x350 [nfit] [..] Call Trace: ? acpi_nfit_query_poison+0x6a/0x180 [nfit] acpi_nfit_scrub+0x36/0xb0 [nfit] process_one_work+0x23c/0x580 worker_thread+0x50/0x3b0 Otherwise the implementation correctly aborts when NULL is returned from devm_kzalloc() in ars_status_alloc(). Link: https://lore.kernel.org/r/159624590643.3037264.14157533719042907758.stgit@dwillia2-desk3.amr.corp.intel.com Cc: Vishal Verma Cc: Dave Jiang Cc: Ira Weiny Signed-off-by: Dan Williams Signed-off-by: Vishal Verma --- drivers/acpi/nfit/core.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index fb775b967c52..26dd208a0d63 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -3334,7 +3334,7 @@ static void acpi_nfit_init_ars(struct acpi_nfit_desc *acpi_desc, static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc) { struct nfit_spa *nfit_spa; - int rc; + int rc, do_sched_ars = 0; set_bit(ARS_VALID, &acpi_desc->scrub_flags); list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { @@ -3346,7 +3346,7 @@ static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc) } } - list_for_each_entry(nfit_spa, &acpi_desc->spas, list) + list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { switch (nfit_spa_type(nfit_spa->spa)) { case NFIT_SPA_VOLATILE: case NFIT_SPA_PM: @@ -3354,6 +3354,13 @@ static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc) rc = ars_register(acpi_desc, nfit_spa); if (rc) return rc; + + /* + * Kick off background ARS if at least one + * region successfully registered ARS + */ + if (!test_bit(ARS_FAILED, &nfit_spa->ars_state)) + do_sched_ars++; break; case NFIT_SPA_BDW: /* nothing to register */ @@ -3372,8 +3379,10 @@ static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc) /* don't register unknown regions */ break; } + } - sched_ars(acpi_desc); + if (do_sched_ars) + sched_ars(acpi_desc); return 0; } From dad42d17558f316e9e807698cd4207359b636084 Mon Sep 17 00:00:00 2001 From: Jane Chu Date: Mon, 3 Aug 2020 16:41:37 -0600 Subject: [PATCH 17/19] libnvdimm/security: fix a typo commit d78c620a2e82 ("libnvdimm/security: Introduce a 'frozen' attribute") introduced a typo, causing a 'nvdimm->sec.flags' update being overwritten by the subsequent update meant for 'nvdimm->sec.ext_flags'. Link: https://lore.kernel.org/r/1596494499-9852-1-git-send-email-jane.chu@oracle.com Fixes: d78c620a2e82 ("libnvdimm/security: Introduce a 'frozen' attribute") Cc: Dan Williams Reviewed-by: Dave Jiang Signed-off-by: Jane Chu Signed-off-by: Vishal Verma --- drivers/nvdimm/security.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvdimm/security.c b/drivers/nvdimm/security.c index 89b85970912d..acfd211c01b9 100644 --- a/drivers/nvdimm/security.c +++ b/drivers/nvdimm/security.c @@ -457,7 +457,7 @@ void __nvdimm_security_overwrite_query(struct nvdimm *nvdimm) clear_bit(NDD_WORK_PENDING, &nvdimm->flags); put_device(&nvdimm->dev); nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER); - nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_MASTER); + nvdimm->sec.ext_flags = nvdimm_security_flags(nvdimm, NVDIMM_MASTER); } void nvdimm_security_overwrite_query(struct work_struct *work) From 7c02d53dfe4db08cd7b4d4be8c7f1b801cf8b899 Mon Sep 17 00:00:00 2001 From: Jane Chu Date: Mon, 3 Aug 2020 16:41:38 -0600 Subject: [PATCH 18/19] libnvdimm/security: the 'security' attr never show 'overwrite' state 'security' attribute displays the security state of an nvdimm. During normal operation, the nvdimm state maybe one of 'disabled', 'unlocked' or 'locked'. When an admin issues # ndctl sanitize-dimm nmem0 --overwrite the attribute is expected to change to 'overwrite' until the overwrite operation completes. But tests on our systems show that 'overwrite' is never shown during the overwrite operation. i.e. # cat /sys/devices/LNXSYSTM:00/LNXSYBUS:00/ACPI0012:00/ndbus0/nmem0/security unlocked the attribute remain 'unlocked' through out the operation, consequently "ndctl wait-overwrite nmem0" command doesn't wait at all. The driver tracks the state in 'nvdimm->sec.flags': when the operation starts, it adds an overwrite bit to the flags; and when the operation completes, it removes the bit. Hence security_show() should check the 'overwrite' bit first, in order to indicate the actual state when multiple bits are set in the flags. Link: https://lore.kernel.org/r/1596494499-9852-2-git-send-email-jane.chu@oracle.com Reviewed-by: Dave Jiang Signed-off-by: Jane Chu Signed-off-by: Vishal Verma --- drivers/nvdimm/dimm_devs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index 2f0815e15986..61374def5155 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -363,14 +363,14 @@ __weak ssize_t security_show(struct device *dev, { struct nvdimm *nvdimm = to_nvdimm(dev); + if (test_bit(NVDIMM_SECURITY_OVERWRITE, &nvdimm->sec.flags)) + return sprintf(buf, "overwrite\n"); if (test_bit(NVDIMM_SECURITY_DISABLED, &nvdimm->sec.flags)) return sprintf(buf, "disabled\n"); if (test_bit(NVDIMM_SECURITY_UNLOCKED, &nvdimm->sec.flags)) return sprintf(buf, "unlocked\n"); if (test_bit(NVDIMM_SECURITY_LOCKED, &nvdimm->sec.flags)) return sprintf(buf, "locked\n"); - if (test_bit(NVDIMM_SECURITY_OVERWRITE, &nvdimm->sec.flags)) - return sprintf(buf, "overwrite\n"); return -ENOTTY; } From 7f674025d9f7321dea11b802cc0ab3f09cbe51c5 Mon Sep 17 00:00:00 2001 From: Jane Chu Date: Mon, 3 Aug 2020 16:41:39 -0600 Subject: [PATCH 19/19] libnvdimm/security: ensure sysfs poll thread woke up and fetch updated attr commit 7d988097c546 ("acpi/nfit, libnvdimm/security: Add security DSM overwrite support") adds a sysfs_notify_dirent() to wake up userspace poll thread when the "overwrite" operation has completed. But the notification is issued before the internal dimm security state and flags have been updated, so the userspace poll thread wakes up and fetches the not-yet-updated attr and falls back to sleep, forever. But if user from another terminal issue "ndctl wait-overwrite nmemX" again, the command returns instantly. Link: https://lore.kernel.org/r/1596494499-9852-3-git-send-email-jane.chu@oracle.com Fixes: 7d988097c546 ("acpi/nfit, libnvdimm/security: Add security DSM overwrite support") Cc: Dave Jiang Cc: Dan Williams Reviewed-by: Dave Jiang Signed-off-by: Jane Chu Signed-off-by: Vishal Verma --- drivers/nvdimm/security.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/nvdimm/security.c b/drivers/nvdimm/security.c index acfd211c01b9..35d265014e1e 100644 --- a/drivers/nvdimm/security.c +++ b/drivers/nvdimm/security.c @@ -450,14 +450,19 @@ void __nvdimm_security_overwrite_query(struct nvdimm *nvdimm) else dev_dbg(&nvdimm->dev, "overwrite completed\n"); - if (nvdimm->sec.overwrite_state) - sysfs_notify_dirent(nvdimm->sec.overwrite_state); + /* + * Mark the overwrite work done and update dimm security flags, + * then send a sysfs event notification to wake up userspace + * poll threads to picked up the changed state. + */ nvdimm->sec.overwrite_tmo = 0; clear_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags); clear_bit(NDD_WORK_PENDING, &nvdimm->flags); - put_device(&nvdimm->dev); nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER); nvdimm->sec.ext_flags = nvdimm_security_flags(nvdimm, NVDIMM_MASTER); + if (nvdimm->sec.overwrite_state) + sysfs_notify_dirent(nvdimm->sec.overwrite_state); + put_device(&nvdimm->dev); } void nvdimm_security_overwrite_query(struct work_struct *work)