From 0a8459693238a339de9705da4e26f6ffbb6a4ebc Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Thu, 22 Aug 2019 11:06:43 +0200 Subject: fbdev: drop res_id parameter from remove_conflicting_pci_framebuffers Since commit b0e999c95581 ("fbdev: list all pci memory bars as conflicting apertures") the parameter was used for some sanity checks only, to make sure we detect any issues with the new approach to just list all memory bars as apertures. No issues turned up so far, so continue to cleanup: Drop the res_id parameter, drop the sanity checks. Also downgrade the logging from "info" level to "debug" level and update documentation. Signed-off-by: Gerd Hoffmann Reviewed-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20190822090645.25410-2-kraxel@redhat.com --- include/linux/fb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fb.h b/include/linux/fb.h index 756706b666a1..41e0069eca0a 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -607,7 +607,7 @@ extern ssize_t fb_sys_write(struct fb_info *info, const char __user *buf, extern int register_framebuffer(struct fb_info *fb_info); extern void unregister_framebuffer(struct fb_info *fb_info); extern void unlink_framebuffer(struct fb_info *fb_info); -extern int remove_conflicting_pci_framebuffers(struct pci_dev *pdev, int res_id, +extern int remove_conflicting_pci_framebuffers(struct pci_dev *pdev, const char *name); extern int remove_conflicting_framebuffers(struct apertures_struct *a, const char *name, bool primary); -- cgit v1.2.3 From c88c8cd8265a9c7c2bf57350ab9c64d89c7b596b Mon Sep 17 00:00:00 2001 From: Mircea Caprioru Date: Mon, 2 Sep 2019 16:08:29 +0300 Subject: iio: adc: ad_sigma_delta: Export ad_sd_calibrate This patch exports the ad_sd_calibrate function in order to be able to call it from outside ad_sigma_delta. There are cases where the option to calibrate one channel at a time is necessary (ex. system calibration for zero scale and full scale). Signed-off-by: Mircea Caprioru Signed-off-by: Jonathan Cameron --- include/linux/iio/adc/ad_sigma_delta.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/adc/ad_sigma_delta.h b/include/linux/iio/adc/ad_sigma_delta.h index 7716fa0c9fce..8a4e25a7080c 100644 --- a/include/linux/iio/adc/ad_sigma_delta.h +++ b/include/linux/iio/adc/ad_sigma_delta.h @@ -119,6 +119,8 @@ int ad_sd_reset(struct ad_sigma_delta *sigma_delta, int ad_sigma_delta_single_conversion(struct iio_dev *indio_dev, const struct iio_chan_spec *chan, int *val); +int ad_sd_calibrate(struct ad_sigma_delta *sigma_delta, + unsigned int mode, unsigned int channel); int ad_sd_calibrate_all(struct ad_sigma_delta *sigma_delta, const struct ad_sd_calib_data *cd, unsigned int n); int ad_sd_init(struct ad_sigma_delta *sigma_delta, struct iio_dev *indio_dev, -- cgit v1.2.3 From ff92741270bf8b6e78aa885f166b68c7a67ab13a Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 30 Sep 2019 11:48:15 +0200 Subject: net: introduce name_node struct to be used in hashlist Introduce name_node structure to hold name of device and put it into hashlist instead of putting there struct net_device directly. Add a necessary infrastructure to manipulate the hashlist. This prepares the code to use the same hashlist for alternative names introduced later in this set. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9eda1c31d1f7..e92bc5467256 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -925,6 +925,12 @@ struct dev_ifalias { struct devlink; struct tlsdev_ops; +struct netdev_name_node { + struct hlist_node hlist; + struct net_device *dev; + const char *name; +}; + /* * This structure defines the management hooks for network devices. * The following hooks can be defined; unless noted otherwise, they are @@ -1564,7 +1570,7 @@ enum netdev_priv_flags { * (i.e. as seen by users in the "Space.c" file). It is the name * of the interface. * - * @name_hlist: Device name hash chain, please keep it close to name[] + * @name_node: Name hashlist node * @ifalias: SNMP alias * @mem_end: Shared memory end * @mem_start: Shared memory start @@ -1774,7 +1780,7 @@ enum netdev_priv_flags { struct net_device { char name[IFNAMSIZ]; - struct hlist_node name_hlist; + struct netdev_name_node *name_node; struct dev_ifalias __rcu *ifalias; /* * I/O specific fields -- cgit v1.2.3 From 36fbf1e52bd3ff8a5cb604955eedfc9350c2e6cc Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 30 Sep 2019 11:48:16 +0200 Subject: net: rtnetlink: add linkprop commands to add and delete alternative ifnames Add two commands to add and delete list of link properties. Implement the first property type along - alternative ifnames. Each net device can have multiple alternative names. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e92bc5467256..48cc71aae466 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -927,10 +927,14 @@ struct tlsdev_ops; struct netdev_name_node { struct hlist_node hlist; + struct list_head list; struct net_device *dev; const char *name; }; +int netdev_name_node_alt_create(struct net_device *dev, const char *name); +int netdev_name_node_alt_destroy(struct net_device *dev, const char *name); + /* * This structure defines the management hooks for network devices. * The following hooks can be defined; unless noted otherwise, they are -- cgit v1.2.3 From a14b820316e84310b1bad3701a8d4c9159377633 Mon Sep 17 00:00:00 2001 From: Vivek Gautam Date: Thu, 18 Jul 2019 18:32:36 +0530 Subject: soc: qcom: llcc cleanup to get rid of sdm845 specific driver file A single file should suffice the need to program the llcc for various platforms. Get rid of sdm845 specific driver file to make way for a more generic driver. Signed-off-by: Vivek Gautam Signed-off-by: Bjorn Andersson --- include/linux/soc/qcom/llcc-qcom.h | 57 ++++++++++++++------------------------ 1 file changed, 21 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h index eb71a50b8afc..d5cad6f7953c 100644 --- a/include/linux/soc/qcom/llcc-qcom.h +++ b/include/linux/soc/qcom/llcc-qcom.h @@ -39,18 +39,27 @@ struct llcc_slice_desc { /** * llcc_slice_config - Data associated with the llcc slice - * @usecase_id: usecase id for which the llcc slice is used - * @slice_id: llcc slice id assigned to each slice - * @max_cap: maximum capacity of the llcc slice - * @priority: priority of the llcc slice - * @fixed_size: whether the llcc slice can grow beyond its size - * @bonus_ways: bonus ways associated with llcc slice - * @res_ways: reserved ways associated with llcc slice - * @cache_mode: mode of the llcc slice - * @probe_target_ways: Probe only reserved and bonus ways on a cache miss - * @dis_cap_alloc: Disable capacity based allocation - * @retain_on_pc: Retain through power collapse - * @activate_on_init: activate the slice on init + * @usecase_id: Unique id for the client's use case + * @slice_id: llcc slice id for each client + * @max_cap: The maximum capacity of the cache slice provided in KB + * @priority: Priority of the client used to select victim line for replacement + * @fixed_size: Boolean indicating if the slice has a fixed capacity + * @bonus_ways: Bonus ways are additional ways to be used for any slice, + * if client ends up using more than reserved cache ways. Bonus + * ways are allocated only if they are not reserved for some + * other client. + * @res_ways: Reserved ways for the cache slice, the reserved ways cannot + * be used by any other client than the one its assigned to. + * @cache_mode: Each slice operates as a cache, this controls the mode of the + * slice: normal or TCM(Tightly Coupled Memory) + * @probe_target_ways: Determines what ways to probe for access hit. When + * configured to 1 only bonus and reserved ways are probed. + * When configured to 0 all ways in llcc are probed. + * @dis_cap_alloc: Disable capacity based allocation for a client + * @retain_on_pc: If this bit is set and client has maintained active vote + * then the ways assigned to this client are not flushed on power + * collapse. + * @activate_on_init: Activate the slice immediately after it is programmed */ struct llcc_slice_config { u32 usecase_id; @@ -154,20 +163,6 @@ int llcc_slice_activate(struct llcc_slice_desc *desc); */ int llcc_slice_deactivate(struct llcc_slice_desc *desc); -/** - * qcom_llcc_probe - program the sct table - * @pdev: platform device pointer - * @table: soc sct table - * @sz: Size of the config table - */ -int qcom_llcc_probe(struct platform_device *pdev, - const struct llcc_slice_config *table, u32 sz); - -/** - * qcom_llcc_remove - remove the sct table - * @pdev: Platform device pointer - */ -int qcom_llcc_remove(struct platform_device *pdev); #else static inline struct llcc_slice_desc *llcc_slice_getd(u32 uid) { @@ -197,16 +192,6 @@ static inline int llcc_slice_deactivate(struct llcc_slice_desc *desc) { return -EINVAL; } -static inline int qcom_llcc_probe(struct platform_device *pdev, - const struct llcc_slice_config *table, u32 sz) -{ - return -ENODEV; -} - -static inline int qcom_llcc_remove(struct platform_device *pdev) -{ - return -ENODEV; -} #endif #endif -- cgit v1.2.3 From 99356b03b431f9589bbaec2bc5bacceccb3dd99a Mon Sep 17 00:00:00 2001 From: Vivek Gautam Date: Thu, 18 Jul 2019 18:32:38 +0530 Subject: soc: qcom: Make llcc-qcom a generic driver This makes way for adding future llcc versions. Also pull out the llcc-qcom specific definitions from includes. Includes path now contains the only definitions that are to be exposed to other subsystems. Signed-off-by: Vivek Gautam Signed-off-by: Bjorn Andersson --- include/linux/soc/qcom/llcc-qcom.h | 89 -------------------------------------- 1 file changed, 89 deletions(-) (limited to 'include/linux') diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h index d5cad6f7953c..c0acdb28fde8 100644 --- a/include/linux/soc/qcom/llcc-qcom.h +++ b/include/linux/soc/qcom/llcc-qcom.h @@ -37,95 +37,6 @@ struct llcc_slice_desc { size_t slice_size; }; -/** - * llcc_slice_config - Data associated with the llcc slice - * @usecase_id: Unique id for the client's use case - * @slice_id: llcc slice id for each client - * @max_cap: The maximum capacity of the cache slice provided in KB - * @priority: Priority of the client used to select victim line for replacement - * @fixed_size: Boolean indicating if the slice has a fixed capacity - * @bonus_ways: Bonus ways are additional ways to be used for any slice, - * if client ends up using more than reserved cache ways. Bonus - * ways are allocated only if they are not reserved for some - * other client. - * @res_ways: Reserved ways for the cache slice, the reserved ways cannot - * be used by any other client than the one its assigned to. - * @cache_mode: Each slice operates as a cache, this controls the mode of the - * slice: normal or TCM(Tightly Coupled Memory) - * @probe_target_ways: Determines what ways to probe for access hit. When - * configured to 1 only bonus and reserved ways are probed. - * When configured to 0 all ways in llcc are probed. - * @dis_cap_alloc: Disable capacity based allocation for a client - * @retain_on_pc: If this bit is set and client has maintained active vote - * then the ways assigned to this client are not flushed on power - * collapse. - * @activate_on_init: Activate the slice immediately after it is programmed - */ -struct llcc_slice_config { - u32 usecase_id; - u32 slice_id; - u32 max_cap; - u32 priority; - bool fixed_size; - u32 bonus_ways; - u32 res_ways; - u32 cache_mode; - u32 probe_target_ways; - bool dis_cap_alloc; - bool retain_on_pc; - bool activate_on_init; -}; - -/** - * llcc_drv_data - Data associated with the llcc driver - * @regmap: regmap associated with the llcc device - * @bcast_regmap: regmap associated with llcc broadcast offset - * @cfg: pointer to the data structure for slice configuration - * @lock: mutex associated with each slice - * @cfg_size: size of the config data table - * @max_slices: max slices as read from device tree - * @num_banks: Number of llcc banks - * @bitmap: Bit map to track the active slice ids - * @offsets: Pointer to the bank offsets array - * @ecc_irq: interrupt for llcc cache error detection and reporting - */ -struct llcc_drv_data { - struct regmap *regmap; - struct regmap *bcast_regmap; - const struct llcc_slice_config *cfg; - struct mutex lock; - u32 cfg_size; - u32 max_slices; - u32 num_banks; - unsigned long *bitmap; - u32 *offsets; - int ecc_irq; -}; - -/** - * llcc_edac_reg_data - llcc edac registers data for each error type - * @name: Name of the error - * @synd_reg: Syndrome register address - * @count_status_reg: Status register address to read the error count - * @ways_status_reg: Status register address to read the error ways - * @reg_cnt: Number of registers - * @count_mask: Mask value to get the error count - * @ways_mask: Mask value to get the error ways - * @count_shift: Shift value to get the error count - * @ways_shift: Shift value to get the error ways - */ -struct llcc_edac_reg_data { - char *name; - u64 synd_reg; - u64 count_status_reg; - u64 ways_status_reg; - u32 reg_cnt; - u32 count_mask; - u32 ways_mask; - u8 count_shift; - u8 ways_shift; -}; - #if IS_ENABLED(CONFIG_QCOM_LLCC) /** * llcc_slice_getd - get llcc slice descriptor -- cgit v1.2.3 From afa0df5998131153ec3036f41e76ece33bf1334f Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 30 Sep 2019 10:15:09 +0200 Subject: net: push loops and nb calls into helper functions Push iterations over net namespaces and netdevices from register_netdevice_notifier() and unregister_netdevice_notifier() into helper functions. Along with that introduce continue_reverse macros to make the code a bit nicer allowing to get rid of "last" marks. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 48cc71aae466..7b183f724fc4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2574,6 +2574,9 @@ extern rwlock_t dev_base_lock; /* Device list lock */ list_for_each_entry_safe(d, n, &(net)->dev_base_head, dev_list) #define for_each_netdev_continue(net, d) \ list_for_each_entry_continue(d, &(net)->dev_base_head, dev_list) +#define for_each_netdev_continue_reverse(net, d) \ + list_for_each_entry_continue_reverse(d, &(net)->dev_base_head, \ + dev_list) #define for_each_netdev_continue_rcu(net, d) \ list_for_each_entry_continue_rcu(d, &(net)->dev_base_head, dev_list) #define for_each_netdev_in_bond_rcu(bond, slave) \ -- cgit v1.2.3 From a30c7b429f2dd980202c912fcb76442364937b4d Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 30 Sep 2019 10:15:10 +0200 Subject: net: introduce per-netns netdevice notifiers Often the code for example in drivers is interested in getting notifier call only from certain network namespace. In addition to the existing global netdevice notifier chain introduce per-netns chains and allow users to register to that. Eventually this would eliminate unnecessary overhead in case there are many netdevices in many network namespaces. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7b183f724fc4..fe45b2c72315 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2504,6 +2504,9 @@ const char *netdev_cmd_to_name(enum netdev_cmd cmd); int register_netdevice_notifier(struct notifier_block *nb); int unregister_netdevice_notifier(struct notifier_block *nb); +int register_netdevice_notifier_net(struct net *net, struct notifier_block *nb); +int unregister_netdevice_notifier_net(struct net *net, + struct notifier_block *nb); struct netdev_notifier_info { struct net_device *dev; -- cgit v1.2.3 From 2d2f116d69c127099553afe0d87cf9c0bbe2759e Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Thu, 12 Sep 2019 20:22:38 -0700 Subject: gpiolib: introduce devm_fwnode_gpiod_get_index() devm_fwnode_get_index_gpiod_from_child() is too long, besides the fwnode in question does not have to be a child of device node. Let's rename it to devm_fwnode_gpiod_get_index() and keep the old name for compatibility for now. Also let's add a devm_fwnode_gpiod_get() wrapper as majority of the callers need a single GPIO. Reviewed-by: Andy Shevchenko Signed-off-by: Dmitry Torokhov Reviewed-by: Mika Westerberg Link: https://lore.kernel.org/r/20190913032240.50333-2-dmitry.torokhov@gmail.com Signed-off-by: Linus Walleij --- include/linux/gpio/consumer.h | 41 +++++++++++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index b70af921c614..dc0ddcd30515 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -176,11 +176,11 @@ struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode, const char *propname, int index, enum gpiod_flags dflags, const char *label); -struct gpio_desc *devm_fwnode_get_index_gpiod_from_child(struct device *dev, - const char *con_id, int index, - struct fwnode_handle *child, - enum gpiod_flags flags, - const char *label); +struct gpio_desc *devm_fwnode_gpiod_get_index(struct device *dev, + struct fwnode_handle *child, + const char *con_id, int index, + enum gpiod_flags flags, + const char *label); #else /* CONFIG_GPIOLIB */ @@ -531,6 +531,29 @@ struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode, return ERR_PTR(-ENOSYS); } +static inline +struct gpio_desc *devm_fwnode_gpiod_get_index(struct device *dev, + struct fwnode_handle *fwnode, + const char *con_id, int index, + enum gpiod_flags flags, + const char *label) +{ + return ERR_PTR(-ENOSYS); +} + +#endif /* CONFIG_GPIOLIB */ + +static inline +struct gpio_desc *devm_fwnode_gpiod_get(struct device *dev, + struct fwnode_handle *fwnode, + const char *con_id, + enum gpiod_flags flags, + const char *label) +{ + return devm_fwnode_gpiod_get_index(dev, fwnode, con_id, 0, + flags, label); +} + static inline struct gpio_desc *devm_fwnode_get_index_gpiod_from_child(struct device *dev, const char *con_id, int index, @@ -538,11 +561,10 @@ struct gpio_desc *devm_fwnode_get_index_gpiod_from_child(struct device *dev, enum gpiod_flags flags, const char *label) { - return ERR_PTR(-ENOSYS); + return devm_fwnode_gpiod_get_index(dev, child, con_id, index, + flags, label); } -#endif /* CONFIG_GPIOLIB */ - static inline struct gpio_desc *devm_fwnode_get_gpiod_from_child(struct device *dev, const char *con_id, @@ -550,8 +572,7 @@ struct gpio_desc *devm_fwnode_get_gpiod_from_child(struct device *dev, enum gpiod_flags flags, const char *label) { - return devm_fwnode_get_index_gpiod_from_child(dev, con_id, 0, child, - flags, label); + return devm_fwnode_gpiod_get_index(dev, child, con_id, 0, flags, label); } #if IS_ENABLED(CONFIG_GPIOLIB) && IS_ENABLED(CONFIG_OF_GPIO) -- cgit v1.2.3 From 13949fa9daa91a60c7cfef40755f7611cc2cf653 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Thu, 12 Sep 2019 20:22:39 -0700 Subject: gpiolib: introduce fwnode_gpiod_get_index() This introduces fwnode_gpiod_get_index() that iterates through common gpio suffixes when trying to locate a GPIO within a given firmware node. We also switch devm_fwnode_gpiod_get_index() to call fwnode_gpiod_get_index() instead of iterating through GPIO suffixes on its own. Reviewed-by: Andy Shevchenko Signed-off-by: Dmitry Torokhov Link: https://lore.kernel.org/r/20190913032240.50333-3-dmitry.torokhov@gmail.com Reviewed-by: Mika Westerberg Signed-off-by: Linus Walleij --- include/linux/gpio/consumer.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index dc0ddcd30515..5215fdba6b9a 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -176,6 +176,10 @@ struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode, const char *propname, int index, enum gpiod_flags dflags, const char *label); +struct gpio_desc *fwnode_gpiod_get_index(struct fwnode_handle *fwnode, + const char *con_id, int index, + enum gpiod_flags flags, + const char *label); struct gpio_desc *devm_fwnode_gpiod_get_index(struct device *dev, struct fwnode_handle *child, const char *con_id, int index, @@ -531,6 +535,15 @@ struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode, return ERR_PTR(-ENOSYS); } +static inline +struct gpio_desc *fwnode_gpiod_get_index(struct fwnode_handle *fwnode, + const char *con_id, int index, + enum gpiod_flags flags, + const char *label) +{ + return ERR_PTR(-ENOSYS); +} + static inline struct gpio_desc *devm_fwnode_gpiod_get_index(struct device *dev, struct fwnode_handle *fwnode, -- cgit v1.2.3 From 8cde3c2153e8f57be884c0e73f18bc4de150e870 Mon Sep 17 00:00:00 2001 From: Carlo Caione Date: Wed, 31 Jul 2019 09:23:39 +0100 Subject: firmware: meson_sm: Rework driver as a proper platform driver The secure monitor driver is currently a frankenstein driver which is registered as a platform driver but its functionality goes through a global struct accessed by the consumer drivers using exported helper functions. Try to tidy up the driver moving the firmware struct into the driver data and make the consumer drivers referencing the secure-monitor using a new property in the DT. Currently only the nvmem driver is using this API so we can fix it in the same commit. Reviewed-by: Jerome Brunet Signed-off-by: Carlo Caione Signed-off-by: Kevin Hilman --- include/linux/firmware/meson/meson_sm.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/firmware/meson/meson_sm.h b/include/linux/firmware/meson/meson_sm.h index 7613bf7c9442..6669e2a1d5fd 100644 --- a/include/linux/firmware/meson/meson_sm.h +++ b/include/linux/firmware/meson/meson_sm.h @@ -16,11 +16,14 @@ enum { struct meson_sm_firmware; -int meson_sm_call(unsigned int cmd_index, u32 *ret, u32 arg0, u32 arg1, - u32 arg2, u32 arg3, u32 arg4); -int meson_sm_call_write(void *buffer, unsigned int b_size, unsigned int cmd_index, - u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4); -int meson_sm_call_read(void *buffer, unsigned int bsize, unsigned int cmd_index, - u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4); +int meson_sm_call(struct meson_sm_firmware *fw, unsigned int cmd_index, + u32 *ret, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4); +int meson_sm_call_write(struct meson_sm_firmware *fw, void *buffer, + unsigned int b_size, unsigned int cmd_index, u32 arg0, + u32 arg1, u32 arg2, u32 arg3, u32 arg4); +int meson_sm_call_read(struct meson_sm_firmware *fw, void *buffer, + unsigned int bsize, unsigned int cmd_index, u32 arg0, + u32 arg1, u32 arg2, u32 arg3, u32 arg4); +struct meson_sm_firmware *meson_sm_get(struct device_node *firmware_node); #endif /* _MESON_SM_FW_H_ */ -- cgit v1.2.3 From 245d73698ed7abdc7e520dfa38048bb80ce89571 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 2 Oct 2019 16:41:58 -0700 Subject: audit: Report suspicious O_CREAT usage This renames the very specific audit_log_link_denied() to audit_log_path_denied() and adds the AUDIT_* type as an argument. This allows for the creation of the new AUDIT_ANOM_CREAT that can be used to report the fifo/regular file creation restrictions that were introduced in commit 30aba6656f61 ("namei: allow restricted O_CREAT of FIFOs and regular files"). Signed-off-by: Kees Cook Signed-off-by: Paul Moore --- include/linux/audit.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index aee3dc9eb378..f9ceae57ca8d 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -156,7 +156,8 @@ extern void audit_log_d_path(struct audit_buffer *ab, const struct path *path); extern void audit_log_key(struct audit_buffer *ab, char *key); -extern void audit_log_link_denied(const char *operation); +extern void audit_log_path_denied(int type, + const char *operation); extern void audit_log_lost(const char *message); extern int audit_log_task_context(struct audit_buffer *ab); @@ -217,7 +218,7 @@ static inline void audit_log_d_path(struct audit_buffer *ab, { } static inline void audit_log_key(struct audit_buffer *ab, char *key) { } -static inline void audit_log_link_denied(const char *string) +static inline void audit_log_path_denied(int type, const char *operation) { } static inline int audit_log_task_context(struct audit_buffer *ab) { -- cgit v1.2.3 From 968a2978cb39a754750d35a47049781660682a31 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Wed, 2 Oct 2019 16:52:57 +0200 Subject: net: stmmac: Only enable enhanced addressing mode when needed Enhanced addressing mode is only required when more than 32 bits need to be addressed. Add a DMA configuration parameter to enable this mode only when needed. Signed-off-by: Thierry Reding Signed-off-by: David S. Miller --- include/linux/stmmac.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index dc60d03c4b60..86f9464c3f5d 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -92,6 +92,7 @@ struct stmmac_dma_cfg { int fixed_burst; int mixed_burst; bool aal; + bool eame; }; #define AXI_BLEN 7 -- cgit v1.2.3 From 372a67c0c5ef63f55bd1eb480d9555328d8ec0f2 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Wed, 4 Sep 2019 14:11:20 -0700 Subject: driver core: Add fwnode_to_dev() to look up device from fwnode It's often useful to look up a device that corresponds to a fwnode. So add an API to do that irrespective of the bus on which the device has been added to. Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20190904211126.47518-2-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/fwnode.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index ababd6bc82f3..d8c6d231d577 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -17,6 +17,7 @@ struct device; struct fwnode_handle { struct fwnode_handle *secondary; const struct fwnode_operations *ops; + struct device *dev; }; /** @@ -123,5 +124,6 @@ struct fwnode_operations { if (fwnode_has_op(fwnode, op)) \ (fwnode)->ops->op(fwnode, ## __VA_ARGS__); \ } while (false) +#define get_dev_from_fwnode(fwnode) get_device((fwnode)->dev) #endif -- cgit v1.2.3 From e2ae9bcc4aaacda04edb75c4eea93384719efaa5 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Wed, 4 Sep 2019 14:11:21 -0700 Subject: driver core: Add support for linking devices during device addition The firmware corresponding to a device (dev.fwnode) might be able to provide functional dependency information between a device and its supplier and consumer devices. Tracking this functional dependency allows optimizing device probe order and informing a supplier when all its consumers have probed (and thereby actively managing their resources). The existing device links feature allows tracking and using supplier-consumer relationships. So, this patch adds the add_links() fwnode callback to allow firmware to create device links for each device as the device is added. However, when consumer devices are added, they might not have a supplier device to link to despite needing mandatory resources/functionality from one or more suppliers. A waiting_for_suppliers list is created to track such consumers and retry linking them when new devices get added. Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20190904211126.47518-3-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 2 ++ include/linux/fwnode.h | 17 +++++++++++++++++ 2 files changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 297239a08bb7..c6fb5b3431b7 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1135,11 +1135,13 @@ enum dl_dev_state { * struct dev_links_info - Device data related to device links. * @suppliers: List of links to supplier devices. * @consumers: List of links to consumer devices. + * @needs_suppliers: Hook to global list of devices waiting for suppliers. * @status: Driver status information. */ struct dev_links_info { struct list_head suppliers; struct list_head consumers; + struct list_head needs_suppliers; enum dl_dev_state status; }; diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index d8c6d231d577..6ae05b9ce359 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -66,6 +66,21 @@ struct fwnode_reference_args { * endpoint node. * @graph_get_port_parent: Return the parent node of a port node. * @graph_parse_endpoint: Parse endpoint for port and endpoint id. + * @add_links: Called after the device corresponding to the fwnode is added + * using device_add(). The function is expected to create device + * links to all the suppliers of the device that are available at + * the time this function is called. The function must NOT stop + * at the first failed device link if other unlinked supplier + * devices are present in the system. If some suppliers are not + * yet available, this function will be called again when other + * devices are added to allow creating device links to any newly + * available suppliers. + * + * Return 0 if device links have been successfully created to all + * the suppliers of this device or if the supplier information is + * not known. Return an error if and only if the supplier + * information is known but some of the suppliers are not yet + * available to create device links to. */ struct fwnode_operations { struct fwnode_handle *(*get)(struct fwnode_handle *fwnode); @@ -103,6 +118,8 @@ struct fwnode_operations { (*graph_get_port_parent)(struct fwnode_handle *fwnode); int (*graph_parse_endpoint)(const struct fwnode_handle *fwnode, struct fwnode_endpoint *endpoint); + int (*add_links)(const struct fwnode_handle *fwnode, + struct device *dev); }; #define fwnode_has_op(fwnode, op) \ -- cgit v1.2.3 From fc5a251d0fd7ca9038bab78a8c97932c8c6ca23b Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Wed, 4 Sep 2019 14:11:23 -0700 Subject: driver core: Add sync_state driver/bus callback This sync_state driver/bus callback is called once all the consumers of a supplier have probed successfully. This allows the supplier device's driver/bus to sync the supplier device's state to the software state with the guarantee that all the consumers are actively managing the resources provided by the supplier device. To maintain backwards compatibility and ease transition from existing frameworks and resource cleanup schemes, late_initcall_sync is the earliest when the sync_state callback might be called. There is no upper bound on the time by which the sync_state callback has to be called. This is because if a consumer device never probes, the supplier has to maintain its resources in the state left by the bootloader. For example, if the bootloader leaves the display backlight at a fixed voltage and the backlight driver is never probed, you don't want the backlight to ever be turned off after boot up. Also, when multiple devices are added after kernel init, some suppliers could be added before their consumer devices get added. In these instances, the supplier devices could get their sync_state callback called right after they probe because the consumers devices haven't had a chance to create device links to the suppliers. To handle this correctly, this change also provides APIs to pause/resume sync state callbacks so that when multiple devices are added, their sync_state callback evaluation can be postponed to happen after all of them are added. kbuild test robot reported missing documentation for device.state_synced Reported-by: kbuild test robot Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20190904211126.47518-5-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index c6fb5b3431b7..6978bb471567 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -80,6 +80,13 @@ extern void bus_remove_file(struct bus_type *, struct bus_attribute *); * that generate uevents to add the environment variables. * @probe: Called when a new device or driver add to this bus, and callback * the specific driver's probe to initial the matched device. + * @sync_state: Called to sync device state to software state after all the + * state tracking consumers linked to this device (present at + * the time of late_initcall) have successfully bound to a + * driver. If the device has no consumers, this function will + * be called at late_initcall_sync level. If the device has + * consumers that are never bound to a driver, this function + * will never get called until they do. * @remove: Called when a device removed from this bus. * @shutdown: Called at shut-down time to quiesce the device. * @@ -123,6 +130,7 @@ struct bus_type { int (*match)(struct device *dev, struct device_driver *drv); int (*uevent)(struct device *dev, struct kobj_uevent_env *env); int (*probe)(struct device *dev); + void (*sync_state)(struct device *dev); int (*remove)(struct device *dev); void (*shutdown)(struct device *dev); @@ -340,6 +348,13 @@ enum probe_type { * @probe: Called to query the existence of a specific device, * whether this driver can work with it, and bind the driver * to a specific device. + * @sync_state: Called to sync device state to software state after all the + * state tracking consumers linked to this device (present at + * the time of late_initcall) have successfully bound to a + * driver. If the device has no consumers, this function will + * be called at late_initcall_sync level. If the device has + * consumers that are never bound to a driver, this function + * will never get called until they do. * @remove: Called when the device is removed from the system to * unbind a device from this driver. * @shutdown: Called at shut-down time to quiesce the device. @@ -379,6 +394,7 @@ struct device_driver { const struct acpi_device_id *acpi_match_table; int (*probe) (struct device *dev); + void (*sync_state)(struct device *dev); int (*remove) (struct device *dev); void (*shutdown) (struct device *dev); int (*suspend) (struct device *dev, pm_message_t state); @@ -1136,12 +1152,14 @@ enum dl_dev_state { * @suppliers: List of links to supplier devices. * @consumers: List of links to consumer devices. * @needs_suppliers: Hook to global list of devices waiting for suppliers. + * @defer_sync: Hook to global list of devices that have deferred sync_state. * @status: Driver status information. */ struct dev_links_info { struct list_head suppliers; struct list_head consumers; struct list_head needs_suppliers; + struct list_head defer_sync; enum dl_dev_state status; }; @@ -1217,6 +1235,9 @@ struct dev_links_info { * @offline: Set after successful invocation of bus type's .offline(). * @of_node_reused: Set if the device-tree node is shared with an ancestor * device. + * @state_synced: The hardware state of this device has been synced to match + * the software state of this device by calling the driver/bus + * sync_state() callback. * @dma_coherent: this particular device is dma coherent, even if the * architecture supports non-coherent devices. * @@ -1313,6 +1334,7 @@ struct device { bool offline_disabled:1; bool offline:1; bool of_node_reused:1; + bool state_synced:1; #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) @@ -1655,6 +1677,8 @@ struct device_link *device_link_add(struct device *consumer, struct device *supplier, u32 flags); void device_link_del(struct device_link *link); void device_link_remove(void *consumer, struct device *supplier); +void device_links_supplier_sync_state_pause(void); +void device_links_supplier_sync_state_resume(void); #ifndef dev_fmt #define dev_fmt(fmt) fmt -- cgit v1.2.3 From 7c550daffe22a97282effa75fe7c1f6b83563ecb Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Oct 2019 11:49:27 +0200 Subject: net: fib_notifier: make FIB notifier per-netns Currently all users of FIB notifier only cares about events in init_net. Later in this patchset, users get interested in other namespaces too. However, for every registered block user is interested only about one namespace. Make the FIB notifier registration per-netns and avoid unnecessary calls of notifier block for other namespaces. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/mroute_base.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index 34de06b426ef..0931631bbc13 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -47,7 +47,6 @@ struct vif_entry_notifier_info { }; static inline int mr_call_vif_notifier(struct notifier_block *nb, - struct net *net, unsigned short family, enum fib_event_type event_type, struct vif_device *vif, @@ -56,7 +55,6 @@ static inline int mr_call_vif_notifier(struct notifier_block *nb, struct vif_entry_notifier_info info = { .info = { .family = family, - .net = net, }, .dev = vif->dev, .vif_index = vif_index, @@ -64,7 +62,7 @@ static inline int mr_call_vif_notifier(struct notifier_block *nb, .tb_id = tb_id, }; - return call_fib_notifier(nb, net, event_type, &info.info); + return call_fib_notifier(nb, event_type, &info.info); } static inline int mr_call_vif_notifiers(struct net *net, @@ -77,7 +75,6 @@ static inline int mr_call_vif_notifiers(struct net *net, struct vif_entry_notifier_info info = { .info = { .family = family, - .net = net, }, .dev = vif->dev, .vif_index = vif_index, @@ -173,7 +170,6 @@ struct mfc_entry_notifier_info { }; static inline int mr_call_mfc_notifier(struct notifier_block *nb, - struct net *net, unsigned short family, enum fib_event_type event_type, struct mr_mfc *mfc, u32 tb_id) @@ -181,13 +177,12 @@ static inline int mr_call_mfc_notifier(struct notifier_block *nb, struct mfc_entry_notifier_info info = { .info = { .family = family, - .net = net, }, .mfc = mfc, .tb_id = tb_id }; - return call_fib_notifier(nb, net, event_type, &info.info); + return call_fib_notifier(nb, event_type, &info.info); } static inline int mr_call_mfc_notifiers(struct net *net, @@ -199,7 +194,6 @@ static inline int mr_call_mfc_notifiers(struct net *net, struct mfc_entry_notifier_info info = { .info = { .family = family, - .net = net, }, .mfc = mfc, .tb_id = tb_id -- cgit v1.2.3 From b7a595577ef3dc9add2b3e6d00869d017306bfbe Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Oct 2019 11:49:30 +0200 Subject: net: fib_notifier: propagate extack down to the notifier block callback Since errors are propagated all the way up to the caller, propagate possible extack of the caller all the way down to the notifier block callback. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/mroute_base.h | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index 0931631bbc13..8071148f29a6 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -50,11 +50,13 @@ static inline int mr_call_vif_notifier(struct notifier_block *nb, unsigned short family, enum fib_event_type event_type, struct vif_device *vif, - unsigned short vif_index, u32 tb_id) + unsigned short vif_index, u32 tb_id, + struct netlink_ext_ack *extack) { struct vif_entry_notifier_info info = { .info = { .family = family, + .extack = extack, }, .dev = vif->dev, .vif_index = vif_index, @@ -172,11 +174,13 @@ struct mfc_entry_notifier_info { static inline int mr_call_mfc_notifier(struct notifier_block *nb, unsigned short family, enum fib_event_type event_type, - struct mr_mfc *mfc, u32 tb_id) + struct mr_mfc *mfc, u32 tb_id, + struct netlink_ext_ack *extack) { struct mfc_entry_notifier_info info = { .info = { .family = family, + .extack = extack, }, .mfc = mfc, .tb_id = tb_id @@ -295,10 +299,11 @@ int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb, int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family, int (*rules_dump)(struct net *net, - struct notifier_block *nb), + struct notifier_block *nb, + struct netlink_ext_ack *extack), struct mr_table *(*mr_iter)(struct net *net, struct mr_table *mrt), - rwlock_t *mrt_lock); + rwlock_t *mrt_lock, struct netlink_ext_ack *extack); #else static inline void vif_device_init(struct vif_device *v, struct net_device *dev, @@ -349,10 +354,11 @@ mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb, static inline int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family, int (*rules_dump)(struct net *net, - struct notifier_block *nb), + struct notifier_block *nb, + struct netlink_ext_ack *extack), struct mr_table *(*mr_iter)(struct net *net, struct mr_table *mrt), - rwlock_t *mrt_lock) + rwlock_t *mrt_lock, struct netlink_ext_ack *extack) { return -EINVAL; } -- cgit v1.2.3 From 894616f79200f74af6a0426fa20d986b1f45cd9b Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 2 Oct 2019 10:20:19 -0700 Subject: Input: add input_get_poll_interval() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some drivers need to be able to know the current polling interval for devices working in polling mode, let's allow them fetching it. Acked-By: Benjamin Tissoires Tested-by: Michal Vokáč Signed-off-by: Dmitry Torokhov --- include/linux/input.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/input.h b/include/linux/input.h index 94f277cd806a..56f2fd32e609 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -383,6 +383,7 @@ int input_setup_polling(struct input_dev *dev, void input_set_poll_interval(struct input_dev *dev, unsigned int interval); void input_set_min_poll_interval(struct input_dev *dev, unsigned int interval); void input_set_max_poll_interval(struct input_dev *dev, unsigned int interval); +int input_get_poll_interval(struct input_dev *dev); int __must_check input_register_handler(struct input_handler *); void input_unregister_handler(struct input_handler *); -- cgit v1.2.3 From 01b4c39901e087ceebae2733857248de81476bd8 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 24 Jul 2019 15:22:59 +0200 Subject: nohz: Add TICK_DEP_BIT_RCU If a nohz_full CPU is looping in the kernel, the scheduling-clock tick might nevertheless remain disabled. In !PREEMPT kernels, this can prevent RCU's attempts to enlist the aid of that CPU's executions of cond_resched(), which can in turn result in an arbitrarily delayed grace period and thus an OOM. RCU therefore needs a way to enable a holdout nohz_full CPU's scheduler-clock interrupt. This commit therefore provides a new TICK_DEP_BIT_RCU value which RCU can pass to tick_dep_set_cpu() and friends to force on the scheduler-clock interrupt for a specified CPU or task. In some cases, rcutorture needs to turn on the scheduler-clock tick, so this commit also exports the relevant symbols to GPL-licensed modules. Signed-off-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney --- include/linux/tick.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tick.h b/include/linux/tick.h index f92a10b5e112..39eb44564058 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -108,7 +108,8 @@ enum tick_dep_bits { TICK_DEP_BIT_POSIX_TIMER = 0, TICK_DEP_BIT_PERF_EVENTS = 1, TICK_DEP_BIT_SCHED = 2, - TICK_DEP_BIT_CLOCK_UNSTABLE = 3 + TICK_DEP_BIT_CLOCK_UNSTABLE = 3, + TICK_DEP_BIT_RCU = 4 }; #define TICK_DEP_MASK_NONE 0 @@ -116,6 +117,7 @@ enum tick_dep_bits { #define TICK_DEP_MASK_PERF_EVENTS (1 << TICK_DEP_BIT_PERF_EVENTS) #define TICK_DEP_MASK_SCHED (1 << TICK_DEP_BIT_SCHED) #define TICK_DEP_MASK_CLOCK_UNSTABLE (1 << TICK_DEP_BIT_CLOCK_UNSTABLE) +#define TICK_DEP_MASK_RCU (1 << TICK_DEP_BIT_RCU) #ifdef CONFIG_NO_HZ_COMMON extern bool tick_nohz_enabled; @@ -268,6 +270,9 @@ static inline bool tick_nohz_full_enabled(void) { return false; } static inline bool tick_nohz_full_cpu(int cpu) { return false; } static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { } +static inline void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit) { } +static inline void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit) { } + static inline void tick_dep_set(enum tick_dep_bits bit) { } static inline void tick_dep_clear(enum tick_dep_bits bit) { } static inline void tick_dep_set_cpu(int cpu, enum tick_dep_bits bit) { } -- cgit v1.2.3 From 366237e7b0833faa2d8da7a8d7d7da8c3ca802e5 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 10 Jul 2019 08:01:01 -0700 Subject: stop_machine: Provide RCU quiescent state in multi_cpu_stop() When multi_cpu_stop() loops waiting for other tasks, it can trigger an RCU CPU stall warning. This can be misleading because what is instead needed is information on whatever task is blocking multi_cpu_stop(). This commit therefore inserts an RCU quiescent state into the multi_cpu_stop() function's waitloop. Signed-off-by: Paul E. McKenney --- include/linux/rcutree.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 18b1ed9864b0..c5147de885ec 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -37,6 +37,7 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func); void rcu_barrier(void); bool rcu_eqs_special_set(int cpu); +void rcu_momentary_dyntick_idle(void); unsigned long get_state_synchronize_rcu(void); void cond_synchronize_rcu(unsigned long oldstate); -- cgit v1.2.3 From 79ba7ff5a9925f5c170f51ed7a96d1475eb6c27f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 4 Aug 2019 13:17:35 -0700 Subject: rcutorture: Emulate dyntick aspect of userspace nohz_full sojourn During an actual call_rcu() flood, there would be frequent trips to userspace (in-kernel call_rcu() floods must be otherwise housebroken). Userspace execution on nohz_full CPUs implies an RCU dyntick idle/not-idle transition pair, so this commit adds emulation of that pair. Signed-off-by: Paul E. McKenney --- include/linux/rcutiny.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 9bf1dfe7781f..37b6f0c2b79d 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -84,6 +84,7 @@ static inline void rcu_scheduler_starting(void) { } #endif /* #else #ifndef CONFIG_SRCU */ static inline void rcu_end_inkernel_boot(void) { } static inline bool rcu_is_watching(void) { return true; } +static inline void rcu_momentary_dyntick_idle(void) { } /* Avoid RCU read-side critical sections leaking across. */ static inline void rcu_all_qs(void) { barrier(); } -- cgit v1.2.3 From 507fd01d5333338753a1cc26322dfc9f856c109f Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 3 Oct 2019 11:29:12 +0200 Subject: drivers: move the early platform device support to arch/sh SuperH is the only user of the current implementation of early platform device support. We want to introduce a more robust approach to early probing. As the first step - move all the current early platform code to arch/sh. In order not to export internal drivers/base functions to arch code for this temporary solution - copy the two needed routines for driver matching from drivers/base/platform.c to arch/sh/drivers/platform_early.c. Also: call early_platform_cleanup() from subsys_initcall() so that it's called after all early devices are probed. Signed-off-by: Bartosz Golaszewski Cc: Rich Felker Link: https://lore.kernel.org/r/20191003092913.10731-2-brgl@bgdev.pl Signed-off-by: Greg Kroah-Hartman --- include/linux/platform_device.h | 64 ++++++++--------------------------------- 1 file changed, 12 insertions(+), 52 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 1b5cec067533..85aa28a041a9 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -292,58 +292,6 @@ void platform_unregister_drivers(struct platform_driver * const *drivers, #define platform_register_drivers(drivers, count) \ __platform_register_drivers(drivers, count, THIS_MODULE) -/* early platform driver interface */ -struct early_platform_driver { - const char *class_str; - struct platform_driver *pdrv; - struct list_head list; - int requested_id; - char *buffer; - int bufsize; -}; - -#define EARLY_PLATFORM_ID_UNSET -2 -#define EARLY_PLATFORM_ID_ERROR -3 - -extern int early_platform_driver_register(struct early_platform_driver *epdrv, - char *buf); -extern void early_platform_add_devices(struct platform_device **devs, int num); - -static inline int is_early_platform_device(struct platform_device *pdev) -{ - return !pdev->dev.driver; -} - -extern void early_platform_driver_register_all(char *class_str); -extern int early_platform_driver_probe(char *class_str, - int nr_probe, int user_only); -extern void early_platform_cleanup(void); - -#define early_platform_init(class_string, platdrv) \ - early_platform_init_buffer(class_string, platdrv, NULL, 0) - -#ifndef MODULE -#define early_platform_init_buffer(class_string, platdrv, buf, bufsiz) \ -static __initdata struct early_platform_driver early_driver = { \ - .class_str = class_string, \ - .buffer = buf, \ - .bufsize = bufsiz, \ - .pdrv = platdrv, \ - .requested_id = EARLY_PLATFORM_ID_UNSET, \ -}; \ -static int __init early_platform_driver_setup_func(char *buffer) \ -{ \ - return early_platform_driver_register(&early_driver, buffer); \ -} \ -early_param(class_string, early_platform_driver_setup_func) -#else /* MODULE */ -#define early_platform_init_buffer(class_string, platdrv, buf, bufsiz) \ -static inline char *early_platform_driver_setup_func(void) \ -{ \ - return bufsiz ? buf : NULL; \ -} -#endif /* MODULE */ - #ifdef CONFIG_SUSPEND extern int platform_pm_suspend(struct device *dev); extern int platform_pm_resume(struct device *dev); @@ -378,4 +326,16 @@ extern int platform_dma_configure(struct device *dev); #define USE_PLATFORM_PM_SLEEP_OPS #endif +#ifndef CONFIG_SUPERH +/* + * REVISIT: This stub is needed for all non-SuperH users of early platform + * drivers. It should go away once we introduce the new platform_device-based + * early driver framework. + */ +static inline int is_early_platform_device(struct platform_device *pdev) +{ + return 0; +} +#endif /* CONFIG_SUPERH */ + #endif /* _PLATFORM_DEVICE_H_ */ -- cgit v1.2.3 From 201e91091b1d47047f55580b5474e1239f4d17aa Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 3 Oct 2019 11:29:13 +0200 Subject: sh: add the sh_ prefix to early platform symbols Old early platform device support is now sh-specific. Before moving on to implementing new early platform framework based on real platform devices, prefix all early platform symbols with 'sh_'. Signed-off-by: Bartosz Golaszewski Cc: Rich Felker Link: https://lore.kernel.org/r/20191003092913.10731-3-brgl@bgdev.pl Signed-off-by: Greg Kroah-Hartman --- include/linux/platform_device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 85aa28a041a9..f086b6a1bc6b 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -332,7 +332,7 @@ extern int platform_dma_configure(struct device *dev); * drivers. It should go away once we introduce the new platform_device-based * early driver framework. */ -static inline int is_early_platform_device(struct platform_device *pdev) +static inline int is_sh_early_platform_device(struct platform_device *pdev) { return 0; } -- cgit v1.2.3 From b9df4fd7e99cb8bfd80c4143f3045d63b1754ad0 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 6 Oct 2019 18:19:54 +0200 Subject: net: core: change return type of pskb_may_pull to bool This function de-facto returns a bool, so let's change the return type accordingly. Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- include/linux/skbuff.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4351577b14d7..0a58402a166e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2261,12 +2261,12 @@ static inline void *pskb_pull(struct sk_buff *skb, unsigned int len) return unlikely(len > skb->len) ? NULL : __pskb_pull(skb, len); } -static inline int pskb_may_pull(struct sk_buff *skb, unsigned int len) +static inline bool pskb_may_pull(struct sk_buff *skb, unsigned int len) { if (likely(len <= skb_headlen(skb))) - return 1; + return true; if (unlikely(len > skb->len)) - return 0; + return false; return __pskb_pull_tail(skb, len - skb_headlen(skb)) != NULL; } -- cgit v1.2.3 From f00eaa38eb0c7185ffff51c5288d00af9032e354 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 7 Oct 2019 15:47:15 +0200 Subject: rtc: add a timestamp for year 0 A few RTCs handle dates from year 0 to year 9999. Add a timestamp even if years before 1970 will probably never be used. Link: https://lore.kernel.org/r/20191007134724.15505-1-alexandre.belloni@bootlin.com Signed-off-by: Alexandre Belloni --- include/linux/rtc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/rtc.h b/include/linux/rtc.h index df666cf29ef1..2680f9b2b119 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -160,6 +160,7 @@ struct rtc_device { #define to_rtc_device(d) container_of(d, struct rtc_device, dev) /* useful timestamps */ +#define RTC_TIMESTAMP_BEGIN_0000 -62167219200ULL /* 0000-01-01 00:00:00 */ #define RTC_TIMESTAMP_BEGIN_1900 -2208988800LL /* 1900-01-01 00:00:00 */ #define RTC_TIMESTAMP_BEGIN_2000 946684800LL /* 2000-01-01 00:00:00 */ #define RTC_TIMESTAMP_END_2063 2966371199LL /* 2063-12-31 23:59:59 */ -- cgit v1.2.3 From 163ab96b52ae2bb2d8f188cd29f0b570610f9007 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 6 Oct 2019 21:09:27 -0700 Subject: net: sockmap: use bitmap for copy info Don't use bool array in struct sk_msg_sg, save 12 bytes. Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Signed-off-by: David S. Miller --- include/linux/skmsg.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index e4b3fb4bb77c..fe80d537945d 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -28,13 +28,14 @@ struct sk_msg_sg { u32 end; u32 size; u32 copybreak; - bool copy[MAX_MSG_FRAGS]; + unsigned long copy; /* The extra element is used for chaining the front and sections when * the list becomes partitioned (e.g. end < start). The crypto APIs * require the chaining. */ struct scatterlist data[MAX_MSG_FRAGS + 1]; }; +static_assert(BITS_PER_LONG >= MAX_MSG_FRAGS); /* UAPI in filter.c depends on struct sk_msg_sg being first element. */ struct sk_msg { @@ -227,7 +228,7 @@ static inline void sk_msg_compute_data_pointers(struct sk_msg *msg) { struct scatterlist *sge = sk_msg_elem(msg, msg->sg.start); - if (msg->sg.copy[msg->sg.start]) { + if (test_bit(msg->sg.start, &msg->sg.copy)) { msg->data = NULL; msg->data_end = NULL; } else { @@ -246,7 +247,7 @@ static inline void sk_msg_page_add(struct sk_msg *msg, struct page *page, sg_set_page(sge, page, len, offset); sg_unmark_end(sge); - msg->sg.copy[msg->sg.end] = true; + __set_bit(msg->sg.end, &msg->sg.copy); msg->sg.size += len; sk_msg_iter_next(msg, end); } @@ -254,7 +255,10 @@ static inline void sk_msg_page_add(struct sk_msg *msg, struct page *page, static inline void sk_msg_sg_copy(struct sk_msg *msg, u32 i, bool copy_state) { do { - msg->sg.copy[i] = copy_state; + if (copy_state) + __set_bit(i, &msg->sg.copy); + else + __clear_bit(i, &msg->sg.copy); sk_msg_iter_var_next(i); if (i == msg->sg.end) break; -- cgit v1.2.3 From 9566256518de0520c964bdf23140eac324b136af Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 30 Sep 2019 16:00:42 -0700 Subject: block: Remove request_queue.nr_queues Commit 897bb0c7f1ea ("blk-mq: Use proper cpumask iterator"; v4.6) removed the last use of request_queue.nr_queues from outside blk_mq_init_allocate_queue(). Remove this member variable to make struct request_queue smaller. This patch does not change any functionality. Cc: Christoph Hellwig Cc: Ming Lei Cc: Hannes Reinecke Cc: Johannes Thumshirn Signed-off-by: Bart Van Assche Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f3ea78b0c91c..d4051acb92a1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -411,7 +411,6 @@ struct request_queue { /* sw queues */ struct blk_mq_ctx __percpu *queue_ctx; - unsigned int nr_queues; unsigned int queue_depth; -- cgit v1.2.3 From 7a18312c739aeace7c8ea448d39a0313d5ad5d5d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 30 Sep 2019 16:00:45 -0700 Subject: block: Document all members of blk_mq_tag_set and bkl_mq_queue_map The meaning of several member variables of these two data structures is nontrivial. Hence document all member variables using the kernel-doc syntax. Cc: Christoph Hellwig Cc: Ming Lei Cc: Hannes Reinecke Cc: Johannes Thumshirn Signed-off-by: Bart Van Assche Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 54 ++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 43 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 0bf056de5cc3..a96b5cc957ab 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -76,6 +76,16 @@ struct blk_mq_hw_ctx { struct srcu_struct srcu[0]; }; +/** + * struct blk_mq_queue_map - ctx -> hctx mapping + * @mq_map: CPU ID to hardware queue index map. This is an array + * with nr_cpu_ids elements. Each element has a value in the range + * [@queue_offset, @queue_offset + @nr_queues). + * @nr_queues: Number of hardware queues to map CPU IDs onto. + * @queue_offset: First hardware queue to map onto. Used by the PCIe NVMe + * driver to map each hardware queue type (enum hctx_type) onto a distinct + * set of hardware queues. + */ struct blk_mq_queue_map { unsigned int *mq_map; unsigned int nr_queues; @@ -90,23 +100,45 @@ enum hctx_type { HCTX_MAX_TYPES, }; +/** + * struct blk_mq_tag_set - tag set that can be shared between request queues + * @map: One or more ctx -> hctx mappings. One map exists for each + * hardware queue type (enum hctx_type) that the driver wishes + * to support. There are no restrictions on maps being of the + * same size, and it's perfectly legal to share maps between + * types. + * @nr_maps: Number of elements in the @map array. A number in the range + * [1, HCTX_MAX_TYPES]. + * @ops: Pointers to functions that implement block driver behavior. + * @nr_hw_queues: Number of hardware queues supported by the block driver that + * owns this data structure. + * @queue_depth: Number of tags per hardware queue, reserved tags included. + * @reserved_tags: Number of tags to set aside for BLK_MQ_REQ_RESERVED tag + * allocations. + * @cmd_size: Number of additional bytes to allocate per request. The block + * driver owns these additional bytes. + * @numa_node: NUMA node the storage adapter has been connected to. + * @timeout: Request processing timeout in jiffies. + * @flags: Zero or more BLK_MQ_F_* flags. + * @driver_data: Pointer to data owned by the block driver that created this + * tag set. + * @tags: Tag sets. One tag set per hardware queue. Has @nr_hw_queues + * elements. + * @tag_list_lock: Serializes tag_list accesses. + * @tag_list: List of the request queues that use this tag set. See also + * request_queue.tag_set_list. + */ struct blk_mq_tag_set { - /* - * map[] holds ctx -> hctx mappings, one map exists for each type - * that the driver wishes to support. There are no restrictions - * on maps being of the same size, and it's perfectly legal to - * share maps between types. - */ struct blk_mq_queue_map map[HCTX_MAX_TYPES]; - unsigned int nr_maps; /* nr entries in map[] */ + unsigned int nr_maps; const struct blk_mq_ops *ops; - unsigned int nr_hw_queues; /* nr hw queues across maps */ - unsigned int queue_depth; /* max hw supported */ + unsigned int nr_hw_queues; + unsigned int queue_depth; unsigned int reserved_tags; - unsigned int cmd_size; /* per-request extra data */ + unsigned int cmd_size; int numa_node; unsigned int timeout; - unsigned int flags; /* BLK_MQ_F_* */ + unsigned int flags; void *driver_data; struct blk_mq_tags **tags; -- cgit v1.2.3 From 27a46989a82c71028f2ba15a3f2c8f30451fda33 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 30 Sep 2019 11:25:49 +0300 Subject: blk-mq: Inline status checkers blk_mq_request_completed() and blk_mq_request_started() are short, inline it. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index a96b5cc957ab..e0fce93ac127 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -333,9 +333,25 @@ static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag) return unique_tag & BLK_MQ_UNIQUE_TAG_MASK; } +/** + * blk_mq_rq_state() - read the current MQ_RQ_* state of a request + * @rq: target request. + */ +static inline enum mq_rq_state blk_mq_rq_state(struct request *rq) +{ + return READ_ONCE(rq->state); +} + +static inline int blk_mq_request_started(struct request *rq) +{ + return blk_mq_rq_state(rq) != MQ_RQ_IDLE; +} + +static inline int blk_mq_request_completed(struct request *rq) +{ + return blk_mq_rq_state(rq) == MQ_RQ_COMPLETE; +} -int blk_mq_request_started(struct request *rq); -int blk_mq_request_completed(struct request *rq); void blk_mq_start_request(struct request *rq); void blk_mq_end_request(struct request *rq, blk_status_t error); void __blk_mq_end_request(struct request *rq, blk_status_t error); -- cgit v1.2.3 From b0a1614fb1f58520938968ebe1f4f11bcf34839e Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 23 Aug 2019 05:16:33 -0700 Subject: firmware: qcom: scm: add OCMEM lock/unlock interface Add support for the OCMEM lock/unlock interface that is needed by the On Chip MEMory (OCMEM) that is present on some Snapdragon devices. Signed-off-by: Rob Clark [masneyb@onstation.org: ported to latest kernel; minor reformatting.] Signed-off-by: Brian Masney Reviewed-by: Bjorn Andersson Tested-by: Gabriel Francisco Signed-off-by: Rob Clark --- include/linux/qcom_scm.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h index 2d5eff506e13..b49b734d662c 100644 --- a/include/linux/qcom_scm.h +++ b/include/linux/qcom_scm.h @@ -24,6 +24,16 @@ struct qcom_scm_vmperm { int perm; }; +enum qcom_scm_ocmem_client { + QCOM_SCM_OCMEM_UNUSED_ID = 0x0, + QCOM_SCM_OCMEM_GRAPHICS_ID, + QCOM_SCM_OCMEM_VIDEO_ID, + QCOM_SCM_OCMEM_LP_AUDIO_ID, + QCOM_SCM_OCMEM_SENSORS_ID, + QCOM_SCM_OCMEM_OTHER_OS_ID, + QCOM_SCM_OCMEM_DEBUG_ID, +}; + #define QCOM_SCM_VMID_HLOS 0x3 #define QCOM_SCM_VMID_MSS_MSA 0xF #define QCOM_SCM_VMID_WLAN 0x18 @@ -41,6 +51,11 @@ extern bool qcom_scm_is_available(void); extern bool qcom_scm_hdcp_available(void); extern int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt, u32 *resp); +extern bool qcom_scm_ocmem_lock_available(void); +extern int qcom_scm_ocmem_lock(enum qcom_scm_ocmem_client id, u32 offset, + u32 size, u32 mode); +extern int qcom_scm_ocmem_unlock(enum qcom_scm_ocmem_client id, u32 offset, + u32 size); extern bool qcom_scm_pas_supported(u32 peripheral); extern int qcom_scm_pas_init_image(u32 peripheral, const void *metadata, size_t size); -- cgit v1.2.3 From 0434a4061471a9afc2b2061add496e58ba4bb92d Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 23 Aug 2019 05:16:34 -0700 Subject: firmware: qcom: scm: add support to restore secure config to qcm_scm-32 Add support to restore the secure configuration for qcm_scm-32.c. This is needed by the On Chip MEMory (OCMEM) that is present on some Snapdragon devices. Signed-off-by: Rob Clark [masneyb@onstation.org: ported to latest kernel; set ctx_bank_num to spare parameter.] Signed-off-by: Brian Masney Reviewed-by: Bjorn Andersson Tested-by: Gabriel Francisco Signed-off-by: Rob Clark --- include/linux/qcom_scm.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h index b49b734d662c..04382e1798e4 100644 --- a/include/linux/qcom_scm.h +++ b/include/linux/qcom_scm.h @@ -34,6 +34,16 @@ enum qcom_scm_ocmem_client { QCOM_SCM_OCMEM_DEBUG_ID, }; +enum qcom_scm_sec_dev_id { + QCOM_SCM_MDSS_DEV_ID = 1, + QCOM_SCM_OCMEM_DEV_ID = 5, + QCOM_SCM_PCIE0_DEV_ID = 11, + QCOM_SCM_PCIE1_DEV_ID = 12, + QCOM_SCM_GFX_DEV_ID = 18, + QCOM_SCM_UFS_DEV_ID = 19, + QCOM_SCM_ICE_DEV_ID = 20, +}; + #define QCOM_SCM_VMID_HLOS 0x3 #define QCOM_SCM_VMID_MSS_MSA 0xF #define QCOM_SCM_VMID_WLAN 0x18 @@ -70,6 +80,7 @@ extern int qcom_scm_assign_mem(phys_addr_t mem_addr, size_t mem_sz, extern void qcom_scm_cpu_power_down(u32 flags); extern u32 qcom_scm_get_version(void); extern int qcom_scm_set_remote_state(u32 state, u32 id); +extern bool qcom_scm_restore_sec_cfg_available(void); extern int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare); extern int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size); extern int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare); -- cgit v1.2.3 From 8c2a2b8c2ff680b1d0f715cf1f320722b762f9dd Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Thu, 3 Oct 2019 11:52:29 +0200 Subject: nvmem: core: add nvmem_device_find nvmem_device_find provides a way to search for nvmem devices with the help of a match function simlair to bus_find_device. Reviewed-by: Srinivas Kandagatla Acked-by: Srinivas Kandagatla Signed-off-by: Thomas Bogendoerfer Signed-off-by: Paul Burton Cc: Jonathan Corbet Cc: Ralf Baechle Cc: James Hogan Cc: Lee Jones Cc: David S. Miller Cc: Alessandro Zummo Cc: Alexandre Belloni Cc: Greg Kroah-Hartman Cc: Jiri Slaby Cc: linux-doc@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-mips@vger.kernel.org Cc: netdev@vger.kernel.org Cc: linux-rtc@vger.kernel.org Cc: linux-serial@vger.kernel.org --- include/linux/nvmem-consumer.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvmem-consumer.h b/include/linux/nvmem-consumer.h index 8f8be5b00060..02dc4aa992b2 100644 --- a/include/linux/nvmem-consumer.h +++ b/include/linux/nvmem-consumer.h @@ -89,6 +89,9 @@ void nvmem_del_cell_lookups(struct nvmem_cell_lookup *entries, int nvmem_register_notifier(struct notifier_block *nb); int nvmem_unregister_notifier(struct notifier_block *nb); +struct nvmem_device *nvmem_device_find(void *data, + int (*match)(struct device *dev, const void *data)); + #else static inline struct nvmem_cell *nvmem_cell_get(struct device *dev, @@ -204,6 +207,12 @@ static inline int nvmem_unregister_notifier(struct notifier_block *nb) return -EOPNOTSUPP; } +static inline struct nvmem_device *nvmem_device_find(void *data, + int (*match)(struct device *dev, const void *data)) +{ + return NULL; +} + #endif /* CONFIG_NVMEM */ #if IS_ENABLED(CONFIG_NVMEM) && IS_ENABLED(CONFIG_OF) -- cgit v1.2.3 From b42faeee718ce13ef6eb99c24880b58deb54c8fa Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 5 Sep 2019 04:01:12 +0300 Subject: spi: Add a PTP system timestamp to the transfer structure SPI is one of the interfaces used to access devices which have a POSIX clock driver (real time clocks, 1588 timers etc). The fact that the SPI bus is slow is not what the main problem is, but rather the fact that drivers don't take a constant amount of time in transferring data over SPI. When there is a high delay in the readout of time, there will be uncertainty in the value that has been read out of the peripheral. When that delay is constant, the uncertainty can at least be approximated with a certain accuracy which is fine more often than not. Timing jitter occurs all over in the kernel code, and is mainly caused by having to let go of the CPU for various reasons such as preemption, servicing interrupts, going to sleep, etc. Another major reason is CPU dynamic frequency scaling. It turns out that the problem of retrieving time from a SPI peripheral with high accuracy can be solved by the use of "PTP system timestamping" - a mechanism to correlate the time when the device has snapshotted its internal time counter with the Linux system time at that same moment. This is sufficient for having a precise time measurement - it is not necessary for the whole SPI transfer to be transmitted "as fast as possible", or "as low-jitter as possible". The system has to be low-jitter for a very short amount of time to be effective. This patch introduces a PTP system timestamping mechanism in struct spi_transfer. This is to be used by SPI device drivers when they need to know the exact time at which the underlying device's time was snapshotted. More often than not, SPI peripherals have a very exact timing for when their SPI-to-interconnect bridge issues a transaction for snapshotting and reading the time register, and that will be dependent on when the SPI-to-interconnect bridge figures out that this is what it should do, aka as soon as it sees byte N of the SPI transfer. Since spi_device drivers are the ones who'd know best how the peripheral behaves in this regard, expose a mechanism in spi_transfer which allows them to specify which word (or word range) from the transfer should be timestamped. Add a default implementation of the PTP system timestamping in the SPI core. This is not going to be satisfactory performance-wise, but should at least increase the likelihood that SPI device drivers will use PTP system timestamping in the future. There are 3 entry points from the core towards the SPI controller drivers: - transfer_one: The driver is passed individual spi_transfers to execute. This is the easiest to timestamp. - transfer_one_message: The core passes the driver an entire spi_message (a potential batch of spi_transfers). The core puts the same pre and post timestamp to all transfers within a message. This is not ideal, but nothing better can be done by default anyway, since the core has no insight into how the driver batches the transfers. - transfer: Like transfer_one_message, but for unqueued drivers (i.e. the driver implements its own queue scheduling). Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20190905010114.26718-3-olteanv@gmail.com Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 61 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index af4f265d0f67..27f6b046cf92 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -13,6 +13,7 @@ #include #include #include +#include struct dma_chan; struct property_entry; @@ -409,6 +410,12 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv) * @fw_translate_cs: If the boot firmware uses different numbering scheme * what Linux expects, this optional hook can be used to translate * between the two. + * @ptp_sts_supported: If the driver sets this to true, it must provide a + * time snapshot in @spi_transfer->ptp_sts as close as possible to the + * moment in time when @spi_transfer->ptp_sts_word_pre and + * @spi_transfer->ptp_sts_word_post were transmitted. + * If the driver does not set this, the SPI core takes the snapshot as + * close to the driver hand-over as possible. * * Each SPI controller can communicate with one or more @spi_device * children. These make a small bus, sharing MOSI, MISO and SCK signals @@ -604,6 +611,15 @@ struct spi_controller { void *dummy_tx; int (*fw_translate_cs)(struct spi_controller *ctlr, unsigned cs); + + /* + * Driver sets this field to indicate it is able to snapshot SPI + * transfers (needed e.g. for reading the time of POSIX clocks) + */ + bool ptp_sts_supported; + + /* Interrupt enable state during PTP system timestamping */ + unsigned long irq_flags; }; static inline void *spi_controller_get_devdata(struct spi_controller *ctlr) @@ -644,6 +660,14 @@ extern struct spi_message *spi_get_next_queued_message(struct spi_controller *ct extern void spi_finalize_current_message(struct spi_controller *ctlr); extern void spi_finalize_current_transfer(struct spi_controller *ctlr); +/* Helper calls for driver to timestamp transfer */ +void spi_take_timestamp_pre(struct spi_controller *ctlr, + struct spi_transfer *xfer, + const void *tx, bool irqs_off); +void spi_take_timestamp_post(struct spi_controller *ctlr, + struct spi_transfer *xfer, + const void *tx, bool irqs_off); + /* the spi driver core manages memory for the spi_controller classdev */ extern struct spi_controller *__spi_alloc_controller(struct device *host, unsigned int size, bool slave); @@ -753,6 +777,35 @@ extern void spi_res_release(struct spi_controller *ctlr, * @transfer_list: transfers are sequenced through @spi_message.transfers * @tx_sg: Scatterlist for transmit, currently not for client use * @rx_sg: Scatterlist for receive, currently not for client use + * @ptp_sts_word_pre: The word (subject to bits_per_word semantics) offset + * within @tx_buf for which the SPI device is requesting that the time + * snapshot for this transfer begins. Upon completing the SPI transfer, + * this value may have changed compared to what was requested, depending + * on the available snapshotting resolution (DMA transfer, + * @ptp_sts_supported is false, etc). + * @ptp_sts_word_post: See @ptp_sts_word_post. The two can be equal (meaning + * that a single byte should be snapshotted). + * If the core takes care of the timestamp (if @ptp_sts_supported is false + * for this controller), it will set @ptp_sts_word_pre to 0, and + * @ptp_sts_word_post to the length of the transfer. This is done + * purposefully (instead of setting to spi_transfer->len - 1) to denote + * that a transfer-level snapshot taken from within the driver may still + * be of higher quality. + * @ptp_sts: Pointer to a memory location held by the SPI slave device where a + * PTP system timestamp structure may lie. If drivers use PIO or their + * hardware has some sort of assist for retrieving exact transfer timing, + * they can (and should) assert @ptp_sts_supported and populate this + * structure using the ptp_read_system_*ts helper functions. + * The timestamp must represent the time at which the SPI slave device has + * processed the word, i.e. the "pre" timestamp should be taken before + * transmitting the "pre" word, and the "post" timestamp after receiving + * transmit confirmation from the controller for the "post" word. + * @timestamped_pre: Set by the SPI controller driver to denote it has acted + * upon the @ptp_sts request. Not set when the SPI core has taken care of + * the task. SPI device drivers are free to print a warning if this comes + * back unset and they need the better resolution. + * @timestamped_post: See above. The reason why both exist is that these + * booleans are also used to keep state in the core SPI logic. * * SPI transfers always write the same number of bytes as they read. * Protocol drivers should always provide @rx_buf and/or @tx_buf. @@ -842,6 +895,14 @@ struct spi_transfer { u32 effective_speed_hz; + unsigned int ptp_sts_word_pre; + unsigned int ptp_sts_word_post; + + struct ptp_system_timestamp *ptp_sts; + + bool timestamped_pre; + bool timestamped_post; + struct list_head transfer_list; }; -- cgit v1.2.3 From 4b7d4d453fc46769394e31d1cb19088f49897b59 Mon Sep 17 00:00:00 2001 From: Harish Kasiviswanathan Date: Thu, 16 May 2019 11:37:16 -0400 Subject: device_cgroup: Export devcgroup_check_permission For AMD compute (amdkfd) driver. All AMD compute devices are exported via single device node /dev/kfd. As a result devices cannot be controlled individually using device cgroup. AMD compute devices will rely on its graphics counterpart that exposes /dev/dri/renderN node for each device. For each task (based on its cgroup), KFD driver will check if /dev/dri/renderN node is accessible before exposing it. Signed-off-by: Harish Kasiviswanathan Acked-by: Tejun Heo Acked-by: Felix Kuehling Reviewed-by: Roman Gushchin Signed-off-by: Alex Deucher --- include/linux/device_cgroup.h | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device_cgroup.h b/include/linux/device_cgroup.h index 8557efe096dc..fa35b52e0002 100644 --- a/include/linux/device_cgroup.h +++ b/include/linux/device_cgroup.h @@ -12,26 +12,15 @@ #define DEVCG_DEV_ALL 4 /* this represents all devices */ #ifdef CONFIG_CGROUP_DEVICE -extern int __devcgroup_check_permission(short type, u32 major, u32 minor, - short access); +int devcgroup_check_permission(short type, u32 major, u32 minor, + short access); #else -static inline int __devcgroup_check_permission(short type, u32 major, u32 minor, - short access) +static inline int devcgroup_check_permission(short type, u32 major, u32 minor, + short access) { return 0; } #endif #if defined(CONFIG_CGROUP_DEVICE) || defined(CONFIG_CGROUP_BPF) -static inline int devcgroup_check_permission(short type, u32 major, u32 minor, - short access) -{ - int rc = BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type, major, minor, access); - - if (rc) - return -EPERM; - - return __devcgroup_check_permission(type, major, minor, access); -} - static inline int devcgroup_inode_permission(struct inode *inode, int mask) { short type, access = 0; -- cgit v1.2.3 From 2c3d0c9ffd24d9b4c62c5dfb2104695a614be28c Mon Sep 17 00:00:00 2001 From: Phil Reid Date: Thu, 19 Sep 2019 22:36:08 +0800 Subject: iio: core: Add optional symbolic label to device attributes If a label is defined in the device tree for this device add that to the device specific attributes. This is useful for userspace to be able to identify an individual device when multiple identical chips are present in the system. Tested-by: Michal Simek Signed-off-by: Phil Reid Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 8e132cf819e4..862ce0019eba 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -510,6 +510,7 @@ struct iio_buffer_setup_ops { * attributes * @chan_attr_group: [INTERN] group for all attrs in base directory * @name: [DRIVER] name of the device. + * @label: [DRIVER] unique name to identify which device this is * @info: [DRIVER] callbacks and constant info from driver * @clock_id: [INTERN] timestamping clock posix identifier * @info_exist_lock: [INTERN] lock to prevent use during removal @@ -553,6 +554,7 @@ struct iio_dev { struct list_head channel_attr_list; struct attribute_group chan_attr_group; const char *name; + const char *label; const struct iio_info *info; clockid_t clock_id; struct mutex info_exist_lock; -- cgit v1.2.3 From 017f77c050a3bc1f1ff877d1f265beeee26d7dea Mon Sep 17 00:00:00 2001 From: Jeremy Sowden Date: Thu, 3 Oct 2019 20:56:01 +0100 Subject: netfilter: ipset: add a coding-style fix to ip_set_ext_destroy. Use a local variable to hold comment in order to align the arguments of ip_set_comment_free properly. Signed-off-by: Jeremy Sowden Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/ipset/ip_set.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 9bc255a8461b..9fee4837d02c 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -269,9 +269,11 @@ ip_set_ext_destroy(struct ip_set *set, void *data) /* Check that the extension is enabled for the set and * call it's destroy function for its extension part in data. */ - if (SET_WITH_COMMENT(set)) - ip_set_extensions[IPSET_EXT_ID_COMMENT].destroy( - set, ext_comment(data, set)); + if (SET_WITH_COMMENT(set)) { + struct ip_set_comment *c = ext_comment(data, set); + + ip_set_extensions[IPSET_EXT_ID_COMMENT].destroy(set, c); + } } static inline int -- cgit v1.2.3 From 94177f6e11c74b6ca3bcf7f65d3d74f00bbd6a8c Mon Sep 17 00:00:00 2001 From: Jeremy Sowden Date: Thu, 3 Oct 2019 20:56:03 +0100 Subject: netfilter: ipset: move ip_set_comment functions from ip_set.h to ip_set_core.c. Most of the functions are only called from within ip_set_core.c. The exception is ip_set_init_comment. However, this is too complex to be a good candidate for a static inline function. Move it to ip_set_core.c, change its linkage to extern and export it, leaving a declaration in ip_set.h. ip_set_comment_free is only used as an extension destructor, so change its prototype to match and drop cast. Signed-off-by: Jeremy Sowden Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/ipset/ip_set.h | 63 ++-------------------------------- 1 file changed, 2 insertions(+), 61 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 9fee4837d02c..985c9bb1ab65 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -521,67 +521,8 @@ ip_set_timeout_get(const unsigned long *timeout) return t == 0 ? 1 : t; } -static inline char* -ip_set_comment_uget(struct nlattr *tb) -{ - return nla_data(tb); -} - -/* Called from uadd only, protected by the set spinlock. - * The kadt functions don't use the comment extensions in any way. - */ -static inline void -ip_set_init_comment(struct ip_set *set, struct ip_set_comment *comment, - const struct ip_set_ext *ext) -{ - struct ip_set_comment_rcu *c = rcu_dereference_protected(comment->c, 1); - size_t len = ext->comment ? strlen(ext->comment) : 0; - - if (unlikely(c)) { - set->ext_size -= sizeof(*c) + strlen(c->str) + 1; - kfree_rcu(c, rcu); - rcu_assign_pointer(comment->c, NULL); - } - if (!len) - return; - if (unlikely(len > IPSET_MAX_COMMENT_SIZE)) - len = IPSET_MAX_COMMENT_SIZE; - c = kmalloc(sizeof(*c) + len + 1, GFP_ATOMIC); - if (unlikely(!c)) - return; - strlcpy(c->str, ext->comment, len + 1); - set->ext_size += sizeof(*c) + strlen(c->str) + 1; - rcu_assign_pointer(comment->c, c); -} - -/* Used only when dumping a set, protected by rcu_read_lock() */ -static inline int -ip_set_put_comment(struct sk_buff *skb, const struct ip_set_comment *comment) -{ - struct ip_set_comment_rcu *c = rcu_dereference(comment->c); - - if (!c) - return 0; - return nla_put_string(skb, IPSET_ATTR_COMMENT, c->str); -} - -/* Called from uadd/udel, flush or the garbage collectors protected - * by the set spinlock. - * Called when the set is destroyed and when there can't be any user - * of the set data anymore. - */ -static inline void -ip_set_comment_free(struct ip_set *set, struct ip_set_comment *comment) -{ - struct ip_set_comment_rcu *c; - - c = rcu_dereference_protected(comment->c, 1); - if (unlikely(!c)) - return; - set->ext_size -= sizeof(*c) + strlen(c->str) + 1; - kfree_rcu(c, rcu); - rcu_assign_pointer(comment->c, NULL); -} +void ip_set_init_comment(struct ip_set *set, struct ip_set_comment *comment, + const struct ip_set_ext *ext); static inline void ip_set_add_bytes(u64 bytes, struct ip_set_counter *counter) -- cgit v1.2.3 From 2398a97688f1aaca09d0a5a809f361e2abf5ff3c Mon Sep 17 00:00:00 2001 From: Jeremy Sowden Date: Thu, 3 Oct 2019 20:56:04 +0100 Subject: netfilter: ipset: move functions to ip_set_core.c. Several inline functions in ip_set.h are only called in ip_set_core.c: move them and remove inline function specifier. Signed-off-by: Jeremy Sowden Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/ipset/ip_set.h | 102 --------------------------------- 1 file changed, 102 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 985c9bb1ab65..44f6de8a1733 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -508,86 +508,9 @@ ip_set_timeout_set(unsigned long *timeout, u32 value) *timeout = t; } -static inline u32 -ip_set_timeout_get(const unsigned long *timeout) -{ - u32 t; - - if (*timeout == IPSET_ELEM_PERMANENT) - return 0; - - t = jiffies_to_msecs(*timeout - jiffies)/MSEC_PER_SEC; - /* Zero value in userspace means no timeout */ - return t == 0 ? 1 : t; -} - void ip_set_init_comment(struct ip_set *set, struct ip_set_comment *comment, const struct ip_set_ext *ext); -static inline void -ip_set_add_bytes(u64 bytes, struct ip_set_counter *counter) -{ - atomic64_add((long long)bytes, &(counter)->bytes); -} - -static inline void -ip_set_add_packets(u64 packets, struct ip_set_counter *counter) -{ - atomic64_add((long long)packets, &(counter)->packets); -} - -static inline u64 -ip_set_get_bytes(const struct ip_set_counter *counter) -{ - return (u64)atomic64_read(&(counter)->bytes); -} - -static inline u64 -ip_set_get_packets(const struct ip_set_counter *counter) -{ - return (u64)atomic64_read(&(counter)->packets); -} - -static inline bool -ip_set_match_counter(u64 counter, u64 match, u8 op) -{ - switch (op) { - case IPSET_COUNTER_NONE: - return true; - case IPSET_COUNTER_EQ: - return counter == match; - case IPSET_COUNTER_NE: - return counter != match; - case IPSET_COUNTER_LT: - return counter < match; - case IPSET_COUNTER_GT: - return counter > match; - } - return false; -} - -static inline void -ip_set_update_counter(struct ip_set_counter *counter, - const struct ip_set_ext *ext, u32 flags) -{ - if (ext->packets != ULLONG_MAX && - !(flags & IPSET_FLAG_SKIP_COUNTER_UPDATE)) { - ip_set_add_bytes(ext->bytes, counter); - ip_set_add_packets(ext->packets, counter); - } -} - -static inline bool -ip_set_put_counter(struct sk_buff *skb, const struct ip_set_counter *counter) -{ - return nla_put_net64(skb, IPSET_ATTR_BYTES, - cpu_to_be64(ip_set_get_bytes(counter)), - IPSET_ATTR_PAD) || - nla_put_net64(skb, IPSET_ATTR_PACKETS, - cpu_to_be64(ip_set_get_packets(counter)), - IPSET_ATTR_PAD); -} - static inline void ip_set_init_counter(struct ip_set_counter *counter, const struct ip_set_ext *ext) @@ -598,31 +521,6 @@ ip_set_init_counter(struct ip_set_counter *counter, atomic64_set(&(counter)->packets, (long long)(ext->packets)); } -static inline void -ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo, - const struct ip_set_ext *ext, - struct ip_set_ext *mext, u32 flags) -{ - mext->skbinfo = *skbinfo; -} - -static inline bool -ip_set_put_skbinfo(struct sk_buff *skb, const struct ip_set_skbinfo *skbinfo) -{ - /* Send nonzero parameters only */ - return ((skbinfo->skbmark || skbinfo->skbmarkmask) && - nla_put_net64(skb, IPSET_ATTR_SKBMARK, - cpu_to_be64((u64)skbinfo->skbmark << 32 | - skbinfo->skbmarkmask), - IPSET_ATTR_PAD)) || - (skbinfo->skbprio && - nla_put_net32(skb, IPSET_ATTR_SKBPRIO, - cpu_to_be32(skbinfo->skbprio))) || - (skbinfo->skbqueue && - nla_put_net16(skb, IPSET_ATTR_SKBQUEUE, - cpu_to_be16(skbinfo->skbqueue))); -} - static inline void ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo, const struct ip_set_ext *ext) -- cgit v1.2.3 From 856391854ce73015fbe2b235f5886205aab166b0 Mon Sep 17 00:00:00 2001 From: Jeremy Sowden Date: Thu, 3 Oct 2019 20:56:05 +0100 Subject: netfilter: ipset: make ip_set_put_flags extern. ip_set_put_flags is rather large for a static inline function in a header-file. Move it to ip_set_core.c and export it. Signed-off-by: Jeremy Sowden Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/ipset/ip_set.h | 23 +---------------------- 1 file changed, 1 insertion(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 44f6de8a1733..4d8b1eaf7708 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -276,28 +276,7 @@ ip_set_ext_destroy(struct ip_set *set, void *data) } } -static inline int -ip_set_put_flags(struct sk_buff *skb, struct ip_set *set) -{ - u32 cadt_flags = 0; - - if (SET_WITH_TIMEOUT(set)) - if (unlikely(nla_put_net32(skb, IPSET_ATTR_TIMEOUT, - htonl(set->timeout)))) - return -EMSGSIZE; - if (SET_WITH_COUNTER(set)) - cadt_flags |= IPSET_FLAG_WITH_COUNTERS; - if (SET_WITH_COMMENT(set)) - cadt_flags |= IPSET_FLAG_WITH_COMMENT; - if (SET_WITH_SKBINFO(set)) - cadt_flags |= IPSET_FLAG_WITH_SKBINFO; - if (SET_WITH_FORCEADD(set)) - cadt_flags |= IPSET_FLAG_WITH_FORCEADD; - - if (!cadt_flags) - return 0; - return nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(cadt_flags)); -} +int ip_set_put_flags(struct sk_buff *skb, struct ip_set *set); /* Netlink CB args */ enum { -- cgit v1.2.3 From 3fbd6c4513b5c27465a1dcf2e4286e6c3183bb1f Mon Sep 17 00:00:00 2001 From: Jeremy Sowden Date: Thu, 3 Oct 2019 20:56:06 +0100 Subject: netfilter: ipset: move function to ip_set_bitmap_ip.c. One inline function in ip_set_bitmap.h is only called in ip_set_bitmap_ip.c: move it and remove inline function specifier. Signed-off-by: Jeremy Sowden Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/ipset/ip_set_bitmap.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set_bitmap.h b/include/linux/netfilter/ipset/ip_set_bitmap.h index 2dddbc6dcac7..fcc4d214a788 100644 --- a/include/linux/netfilter/ipset/ip_set_bitmap.h +++ b/include/linux/netfilter/ipset/ip_set_bitmap.h @@ -12,18 +12,4 @@ enum { IPSET_ADD_START_STORED_TIMEOUT, }; -/* Common functions */ - -static inline u32 -range_to_mask(u32 from, u32 to, u8 *bits) -{ - u32 mask = 0xFFFFFFFE; - - *bits = 32; - while (--(*bits) > 0 && mask && (to & mask) != from) - mask <<= 1; - - return mask; -} - #endif /* __IP_SET_BITMAP_H */ -- cgit v1.2.3 From f8615bf8a3dabd84bf844c6f888929495039d389 Mon Sep 17 00:00:00 2001 From: Jeremy Sowden Date: Thu, 3 Oct 2019 20:56:07 +0100 Subject: netfilter: ipset: move ip_set_get_ip_port() to ip_set_bitmap_port.c. ip_set_get_ip_port() is only used in ip_set_bitmap_port.c. Move it there and make it static. Signed-off-by: Jeremy Sowden Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/ipset/ip_set_getport.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set_getport.h b/include/linux/netfilter/ipset/ip_set_getport.h index d74cd112b88a..1ecaabd9a048 100644 --- a/include/linux/netfilter/ipset/ip_set_getport.h +++ b/include/linux/netfilter/ipset/ip_set_getport.h @@ -20,9 +20,6 @@ static inline bool ip_set_get_ip6_port(const struct sk_buff *skb, bool src, } #endif -extern bool ip_set_get_ip_port(const struct sk_buff *skb, u8 pf, bool src, - __be16 *port); - static inline bool ip_set_proto_with_ports(u8 proto) { switch (proto) { -- cgit v1.2.3 From 7d47433cf74f942a414171867d89c08640cfef45 Mon Sep 17 00:00:00 2001 From: Yamin Friedman Date: Mon, 7 Oct 2019 16:59:31 +0300 Subject: net/mlx5: Expose optimal performance scatter entries capability Expose maximum scatter entries per RDMA READ for optimal performance. Signed-off-by: Yamin Friedman Reviewed-by: Or Gerlitz Reviewed-by: Christoph Hellwig Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 138c50d5a353..c0bfb1d90dd2 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1153,7 +1153,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 log_max_srq[0x5]; u8 reserved_at_b0[0x10]; - u8 reserved_at_c0[0x8]; + u8 max_sgl_for_optimized_performance[0x8]; u8 log_max_cq_sz[0x8]; u8 reserved_at_d0[0xb]; u8 log_max_cq[0x5]; -- cgit v1.2.3 From 79591b7db21d255db158afaa48c557dcab631a1c Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 5 Sep 2019 04:01:12 +0300 Subject: spi: Add a PTP system timestamp to the transfer structure SPI is one of the interfaces used to access devices which have a POSIX clock driver (real time clocks, 1588 timers etc). The fact that the SPI bus is slow is not what the main problem is, but rather the fact that drivers don't take a constant amount of time in transferring data over SPI. When there is a high delay in the readout of time, there will be uncertainty in the value that has been read out of the peripheral. When that delay is constant, the uncertainty can at least be approximated with a certain accuracy which is fine more often than not. Timing jitter occurs all over in the kernel code, and is mainly caused by having to let go of the CPU for various reasons such as preemption, servicing interrupts, going to sleep, etc. Another major reason is CPU dynamic frequency scaling. It turns out that the problem of retrieving time from a SPI peripheral with high accuracy can be solved by the use of "PTP system timestamping" - a mechanism to correlate the time when the device has snapshotted its internal time counter with the Linux system time at that same moment. This is sufficient for having a precise time measurement - it is not necessary for the whole SPI transfer to be transmitted "as fast as possible", or "as low-jitter as possible". The system has to be low-jitter for a very short amount of time to be effective. This patch introduces a PTP system timestamping mechanism in struct spi_transfer. This is to be used by SPI device drivers when they need to know the exact time at which the underlying device's time was snapshotted. More often than not, SPI peripherals have a very exact timing for when their SPI-to-interconnect bridge issues a transaction for snapshotting and reading the time register, and that will be dependent on when the SPI-to-interconnect bridge figures out that this is what it should do, aka as soon as it sees byte N of the SPI transfer. Since spi_device drivers are the ones who'd know best how the peripheral behaves in this regard, expose a mechanism in spi_transfer which allows them to specify which word (or word range) from the transfer should be timestamped. Add a default implementation of the PTP system timestamping in the SPI core. This is not going to be satisfactory performance-wise, but should at least increase the likelihood that SPI device drivers will use PTP system timestamping in the future. There are 3 entry points from the core towards the SPI controller drivers: - transfer_one: The driver is passed individual spi_transfers to execute. This is the easiest to timestamp. - transfer_one_message: The core passes the driver an entire spi_message (a potential batch of spi_transfers). The core puts the same pre and post timestamp to all transfers within a message. This is not ideal, but nothing better can be done by default anyway, since the core has no insight into how the driver batches the transfers. - transfer: Like transfer_one_message, but for unqueued drivers (i.e. the driver implements its own queue scheduling). Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20190905010114.26718-3-olteanv@gmail.com Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 61 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index af4f265d0f67..27f6b046cf92 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -13,6 +13,7 @@ #include #include #include +#include struct dma_chan; struct property_entry; @@ -409,6 +410,12 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv) * @fw_translate_cs: If the boot firmware uses different numbering scheme * what Linux expects, this optional hook can be used to translate * between the two. + * @ptp_sts_supported: If the driver sets this to true, it must provide a + * time snapshot in @spi_transfer->ptp_sts as close as possible to the + * moment in time when @spi_transfer->ptp_sts_word_pre and + * @spi_transfer->ptp_sts_word_post were transmitted. + * If the driver does not set this, the SPI core takes the snapshot as + * close to the driver hand-over as possible. * * Each SPI controller can communicate with one or more @spi_device * children. These make a small bus, sharing MOSI, MISO and SCK signals @@ -604,6 +611,15 @@ struct spi_controller { void *dummy_tx; int (*fw_translate_cs)(struct spi_controller *ctlr, unsigned cs); + + /* + * Driver sets this field to indicate it is able to snapshot SPI + * transfers (needed e.g. for reading the time of POSIX clocks) + */ + bool ptp_sts_supported; + + /* Interrupt enable state during PTP system timestamping */ + unsigned long irq_flags; }; static inline void *spi_controller_get_devdata(struct spi_controller *ctlr) @@ -644,6 +660,14 @@ extern struct spi_message *spi_get_next_queued_message(struct spi_controller *ct extern void spi_finalize_current_message(struct spi_controller *ctlr); extern void spi_finalize_current_transfer(struct spi_controller *ctlr); +/* Helper calls for driver to timestamp transfer */ +void spi_take_timestamp_pre(struct spi_controller *ctlr, + struct spi_transfer *xfer, + const void *tx, bool irqs_off); +void spi_take_timestamp_post(struct spi_controller *ctlr, + struct spi_transfer *xfer, + const void *tx, bool irqs_off); + /* the spi driver core manages memory for the spi_controller classdev */ extern struct spi_controller *__spi_alloc_controller(struct device *host, unsigned int size, bool slave); @@ -753,6 +777,35 @@ extern void spi_res_release(struct spi_controller *ctlr, * @transfer_list: transfers are sequenced through @spi_message.transfers * @tx_sg: Scatterlist for transmit, currently not for client use * @rx_sg: Scatterlist for receive, currently not for client use + * @ptp_sts_word_pre: The word (subject to bits_per_word semantics) offset + * within @tx_buf for which the SPI device is requesting that the time + * snapshot for this transfer begins. Upon completing the SPI transfer, + * this value may have changed compared to what was requested, depending + * on the available snapshotting resolution (DMA transfer, + * @ptp_sts_supported is false, etc). + * @ptp_sts_word_post: See @ptp_sts_word_post. The two can be equal (meaning + * that a single byte should be snapshotted). + * If the core takes care of the timestamp (if @ptp_sts_supported is false + * for this controller), it will set @ptp_sts_word_pre to 0, and + * @ptp_sts_word_post to the length of the transfer. This is done + * purposefully (instead of setting to spi_transfer->len - 1) to denote + * that a transfer-level snapshot taken from within the driver may still + * be of higher quality. + * @ptp_sts: Pointer to a memory location held by the SPI slave device where a + * PTP system timestamp structure may lie. If drivers use PIO or their + * hardware has some sort of assist for retrieving exact transfer timing, + * they can (and should) assert @ptp_sts_supported and populate this + * structure using the ptp_read_system_*ts helper functions. + * The timestamp must represent the time at which the SPI slave device has + * processed the word, i.e. the "pre" timestamp should be taken before + * transmitting the "pre" word, and the "post" timestamp after receiving + * transmit confirmation from the controller for the "post" word. + * @timestamped_pre: Set by the SPI controller driver to denote it has acted + * upon the @ptp_sts request. Not set when the SPI core has taken care of + * the task. SPI device drivers are free to print a warning if this comes + * back unset and they need the better resolution. + * @timestamped_post: See above. The reason why both exist is that these + * booleans are also used to keep state in the core SPI logic. * * SPI transfers always write the same number of bytes as they read. * Protocol drivers should always provide @rx_buf and/or @tx_buf. @@ -842,6 +895,14 @@ struct spi_transfer { u32 effective_speed_hz; + unsigned int ptp_sts_word_pre; + unsigned int ptp_sts_word_post; + + struct ptp_system_timestamp *ptp_sts; + + bool timestamped_pre; + bool timestamped_post; + struct list_head transfer_list; }; -- cgit v1.2.3 From 3a9ac959ba2825a3a6235bc909d369cc30386e9e Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 5 Sep 2019 11:44:24 +0100 Subject: of: Remove unused of_find_matching_node_by_address() of_find_matching_node_by_address() is unused, so remove it. Cc: Robin Murphy Reviewed-by: Geert Uytterhoeven Reviewed-by: Christoph Hellwig Tested-by: Nicolas Saenz Julienne Reviewed-by: Nicolas Saenz Julienne Signed-off-by: Rob Herring --- include/linux/of_address.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of_address.h b/include/linux/of_address.h index 30e40fb6936b..e317f375374a 100644 --- a/include/linux/of_address.h +++ b/include/linux/of_address.h @@ -33,10 +33,6 @@ extern u64 of_translate_dma_address(struct device_node *dev, extern u64 of_translate_address(struct device_node *np, const __be32 *addr); extern int of_address_to_resource(struct device_node *dev, int index, struct resource *r); -extern struct device_node *of_find_matching_node_by_address( - struct device_node *from, - const struct of_device_id *matches, - u64 base_address); extern void __iomem *of_iomap(struct device_node *device, int index); void __iomem *of_io_request_and_map(struct device_node *device, int index, const char *name); @@ -71,14 +67,6 @@ static inline u64 of_translate_address(struct device_node *np, return OF_BAD_ADDR; } -static inline struct device_node *of_find_matching_node_by_address( - struct device_node *from, - const struct of_device_id *matches, - u64 base_address) -{ - return NULL; -} - static inline const __be32 *of_get_address(struct device_node *dev, int index, u64 *size, unsigned int *flags) { -- cgit v1.2.3 From 6e6faf63744333373db8bc64aea52dab86cbf0bc Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 5 Sep 2019 11:53:27 +0100 Subject: of: Make of_dma_get_range() private of_dma_get_range() is only used within the DT core code, so remove the export and move the header declaration to the private header. Cc: Robin Murphy Reviewed-by: Geert Uytterhoeven Tested-by: Nicolas Saenz Julienne Reviewed-by: Nicolas Saenz Julienne Reviewed-by: Christoph Hellwig Signed-off-by: Rob Herring --- include/linux/of_address.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of_address.h b/include/linux/of_address.h index e317f375374a..ddda3936039c 100644 --- a/include/linux/of_address.h +++ b/include/linux/of_address.h @@ -51,8 +51,6 @@ extern int of_pci_dma_range_parser_init(struct of_pci_range_parser *parser, extern struct of_pci_range *of_pci_range_parser_one( struct of_pci_range_parser *parser, struct of_pci_range *range); -extern int of_dma_get_range(struct device_node *np, u64 *dma_addr, - u64 *paddr, u64 *size); extern bool of_dma_is_coherent(struct device_node *np); #else /* CONFIG_OF_ADDRESS */ static inline void __iomem *of_io_request_and_map(struct device_node *device, @@ -92,12 +90,6 @@ static inline struct of_pci_range *of_pci_range_parser_one( return NULL; } -static inline int of_dma_get_range(struct device_node *np, u64 *dma_addr, - u64 *paddr, u64 *size) -{ - return -ENODEV; -} - static inline bool of_dma_is_coherent(struct device_node *np) { return false; -- cgit v1.2.3 From 645c138636de3d6d6ed7d92edec39298fd6873d7 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 5 Sep 2019 10:47:26 +0100 Subject: of/address: Fix of_pci_range_parser_one translation of DMA addresses of_pci_range_parser_one() has a bug when parsing dma-ranges. When it translates the parent address (aka cpu address in the code), 'ranges' is always being used. This happens to work because most users are just 1:1 translation. Cc: Robin Murphy Tested-by: Nicolas Saenz Julienne Reviewed-by: Nicolas Saenz Julienne Signed-off-by: Rob Herring --- include/linux/of_address.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/of_address.h b/include/linux/of_address.h index ddda3936039c..eac7ab109df4 100644 --- a/include/linux/of_address.h +++ b/include/linux/of_address.h @@ -12,6 +12,7 @@ struct of_pci_range_parser { const __be32 *end; int np; int pna; + bool dma; }; struct of_pci_range { -- cgit v1.2.3 From 89da2ba947b1080199f4a6413686569a75fc2e7d Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 8 Oct 2019 15:16:14 +0800 Subject: soc: qcom: Fix llcc-qcom definitions to include commit 99356b03b431 ("soc: qcom: Make llcc-qcom a generic driver") move these out of llcc-qcom.h, make the building fails: drivers/edac/qcom_edac.c:86:40: error: array type has incomplete element type struct llcc_edac_reg_data static const struct llcc_edac_reg_data edac_reg_data[] = { ^~~~~~~~~~~~~ drivers/edac/qcom_edac.c:87:3: error: array index in non-array initializer [LLCC_DRAM_CE] = { ^~~~~~~~~~~~ drivers/edac/qcom_edac.c:87:3: note: (near initialization for edac_reg_data) drivers/edac/qcom_edac.c:88:3: error: field name not in record or union initializer .name = "DRAM Single-bit", ... drivers/edac/qcom_edac.c:169:51: warning: struct llcc_drv_data declared inside parameter list will not be visible outside of this definition or declaration qcom_llcc_clear_error_status(int err_type, struct llcc_drv_data *drv) ^~~~~~~~~~~~~ This patch move the needed definitions back to include. Reported-by: Hulk Robot Fixes: 99356b03b431 ("soc: qcom: Make llcc-qcom a generic driver") Signed-off-by: YueHaibing Signed-off-by: Bjorn Andersson --- include/linux/soc/qcom/llcc-qcom.h | 50 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h index c0acdb28fde8..90b864655822 100644 --- a/include/linux/soc/qcom/llcc-qcom.h +++ b/include/linux/soc/qcom/llcc-qcom.h @@ -37,6 +37,56 @@ struct llcc_slice_desc { size_t slice_size; }; +/** + * llcc_edac_reg_data - llcc edac registers data for each error type + * @name: Name of the error + * @synd_reg: Syndrome register address + * @count_status_reg: Status register address to read the error count + * @ways_status_reg: Status register address to read the error ways + * @reg_cnt: Number of registers + * @count_mask: Mask value to get the error count + * @ways_mask: Mask value to get the error ways + * @count_shift: Shift value to get the error count + * @ways_shift: Shift value to get the error ways + */ +struct llcc_edac_reg_data { + char *name; + u64 synd_reg; + u64 count_status_reg; + u64 ways_status_reg; + u32 reg_cnt; + u32 count_mask; + u32 ways_mask; + u8 count_shift; + u8 ways_shift; +}; + +/** + * llcc_drv_data - Data associated with the llcc driver + * @regmap: regmap associated with the llcc device + * @bcast_regmap: regmap associated with llcc broadcast offset + * @cfg: pointer to the data structure for slice configuration + * @lock: mutex associated with each slice + * @cfg_size: size of the config data table + * @max_slices: max slices as read from device tree + * @num_banks: Number of llcc banks + * @bitmap: Bit map to track the active slice ids + * @offsets: Pointer to the bank offsets array + * @ecc_irq: interrupt for llcc cache error detection and reporting + */ +struct llcc_drv_data { + struct regmap *regmap; + struct regmap *bcast_regmap; + const struct llcc_slice_config *cfg; + struct mutex lock; + u32 cfg_size; + u32 max_slices; + u32 num_banks; + unsigned long *bitmap; + u32 *offsets; + int ecc_irq; +}; + #if IS_ENABLED(CONFIG_QCOM_LLCC) /** * llcc_slice_getd - get llcc slice descriptor -- cgit v1.2.3 From bacb7e1855969bba78b32302453d2cc8ba0bc403 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 8 Oct 2019 14:20:34 -0700 Subject: Revert "tun: call dev_get_valid_name() before register_netdevice()" This reverts commit 0ad646c81b2182f7fa67ec0c8c825e0ee165696d. As noticed by Jakub, this is no longer needed after commit 11fc7d5a0a2d ("tun: fix memory leak in error path") This no longer exports dev_get_valid_name() for the exclusive use of tun driver. Suggested-by: Jakub Kicinski Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index fe45b2c72315..3207e0b9ec4e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4113,9 +4113,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, unsigned char name_assign_type, void (*setup)(struct net_device *), unsigned int txqs, unsigned int rxqs); -int dev_get_valid_name(struct net *net, struct net_device *dev, - const char *name); - #define alloc_netdev(sizeof_priv, name, name_assign_type, setup) \ alloc_netdev_mqs(sizeof_priv, name, name_assign_type, setup, 1, 1) -- cgit v1.2.3 From f83eeb1a01689b2691f6f56629ac9f66de8d41c2 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 3 Oct 2019 18:17:44 +0200 Subject: sched/cputime: Rename vtime_account_system() to vtime_account_kernel() vtime_account_system() decides if we need to account the time to the system (__vtime_account_system()) or to the guest (vtime_account_guest()). So this function is a misnomer as we are on a higher level than "system". All we know when we call that function is that we are accounting kernel cputime. Whether it belongs to guest or system time is a lower level detail. Rename this function to vtime_account_kernel(). This will clarify things and avoid too many underscored vtime_account_system() versions. Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Wanpeng Li Cc: Yauheni Kaliuta Link: https://lkml.kernel.org/r/20191003161745.28464-2-frederic@kernel.org Signed-off-by: Ingo Molnar --- include/linux/context_tracking.h | 4 ++-- include/linux/vtime.h | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index d05609ad329d..558a209c247d 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -141,7 +141,7 @@ static inline void guest_enter_irqoff(void) * to assume that it's the stime pending cputime * to flush. */ - vtime_account_system(current); + vtime_account_kernel(current); current->flags |= PF_VCPU; rcu_virt_note_context_switch(smp_processor_id()); } @@ -149,7 +149,7 @@ static inline void guest_enter_irqoff(void) static inline void guest_exit_irqoff(void) { /* Flush the guest cputime we spent on the guest */ - vtime_account_system(current); + vtime_account_kernel(current); current->flags &= ~PF_VCPU; } #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */ diff --git a/include/linux/vtime.h b/include/linux/vtime.h index a26ed10a4eac..2fd247f90408 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -57,13 +57,13 @@ static inline void vtime_task_switch(struct task_struct *prev) } #endif /* __ARCH_HAS_VTIME_TASK_SWITCH */ -extern void vtime_account_system(struct task_struct *tsk); +extern void vtime_account_kernel(struct task_struct *tsk); extern void vtime_account_idle(struct task_struct *tsk); #else /* !CONFIG_VIRT_CPU_ACCOUNTING */ static inline void vtime_task_switch(struct task_struct *prev) { } -static inline void vtime_account_system(struct task_struct *tsk) { } +static inline void vtime_account_kernel(struct task_struct *tsk) { } #endif /* !CONFIG_VIRT_CPU_ACCOUNTING */ #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN @@ -86,7 +86,7 @@ extern void vtime_account_irq_enter(struct task_struct *tsk); static inline void vtime_account_irq_exit(struct task_struct *tsk) { /* On hard|softirq exit we always account to hard|softirq cputime */ - vtime_account_system(tsk); + vtime_account_kernel(tsk); } extern void vtime_flush(struct task_struct *tsk); #else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ -- cgit v1.2.3 From 8d495477d62e4397207f22a432fcaa86d9f2bc2d Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 3 Oct 2019 18:17:45 +0200 Subject: sched/cputime: Spare a seqcount lock/unlock cycle on context switch On context switch we are locking the vtime seqcount of the scheduling-out task twice: * On vtime_task_switch_common(), when we flush the pending vtime through vtime_account_system() * On arch_vtime_task_switch() to reset the vtime state. This is pointless as these actions can be performed without the need to unlock/lock in the middle. The reason these steps are separated is to consolidate a very small amount of common code between CONFIG_VIRT_CPU_ACCOUNTING_GEN and CONFIG_VIRT_CPU_ACCOUNTING_NATIVE. Performance in this fast path is definitely a priority over artificial code factorization so split the task switch code between GEN and NATIVE and mutualize the parts than can run under a single seqcount locked block. As a side effect, vtime_account_idle() becomes included in the seqcount protection. This happens to be a welcome preparation in order to properly support kcpustat under vtime in the future and fetch CPUTIME_IDLE without race. Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Wanpeng Li Cc: Yauheni Kaliuta Link: https://lkml.kernel.org/r/20191003161745.28464-3-frederic@kernel.org Signed-off-by: Ingo Molnar --- include/linux/vtime.h | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vtime.h b/include/linux/vtime.h index 2fd247f90408..d9160ab3667a 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -14,8 +14,12 @@ struct task_struct; * vtime_accounting_cpu_enabled() definitions/declarations */ #if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) + static inline bool vtime_accounting_cpu_enabled(void) { return true; } +extern void vtime_task_switch(struct task_struct *prev); + #elif defined(CONFIG_VIRT_CPU_ACCOUNTING_GEN) + /* * Checks if vtime is enabled on some CPU. Cputime readers want to be careful * in that case and compute the tickless cputime. @@ -36,33 +40,29 @@ static inline bool vtime_accounting_cpu_enabled(void) return false; } + +extern void vtime_task_switch_generic(struct task_struct *prev); + +static inline void vtime_task_switch(struct task_struct *prev) +{ + if (vtime_accounting_cpu_enabled()) + vtime_task_switch_generic(prev); +} + #else /* !CONFIG_VIRT_CPU_ACCOUNTING */ + static inline bool vtime_accounting_cpu_enabled(void) { return false; } -#endif +static inline void vtime_task_switch(struct task_struct *prev) { } +#endif /* * Common vtime APIs */ #ifdef CONFIG_VIRT_CPU_ACCOUNTING - -#ifdef __ARCH_HAS_VTIME_TASK_SWITCH -extern void vtime_task_switch(struct task_struct *prev); -#else -extern void vtime_common_task_switch(struct task_struct *prev); -static inline void vtime_task_switch(struct task_struct *prev) -{ - if (vtime_accounting_cpu_enabled()) - vtime_common_task_switch(prev); -} -#endif /* __ARCH_HAS_VTIME_TASK_SWITCH */ - extern void vtime_account_kernel(struct task_struct *tsk); extern void vtime_account_idle(struct task_struct *tsk); - #else /* !CONFIG_VIRT_CPU_ACCOUNTING */ - -static inline void vtime_task_switch(struct task_struct *prev) { } static inline void vtime_account_kernel(struct task_struct *tsk) { } #endif /* !CONFIG_VIRT_CPU_ACCOUNTING */ -- cgit v1.2.3 From 5facae4f3549b5cf7c0e10ec312a65ffd43b5726 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Thu, 19 Sep 2019 12:09:40 -0400 Subject: locking/lockdep: Remove unused @nested argument from lock_release() Since the following commit: b4adfe8e05f1 ("locking/lockdep: Remove unused argument in __lock_release") @nested is no longer used in lock_release(), so remove it from all lock_release() calls and friends. Signed-off-by: Qian Cai Signed-off-by: Peter Zijlstra (Intel) Acked-by: Will Deacon Acked-by: Daniel Vetter Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: airlied@linux.ie Cc: akpm@linux-foundation.org Cc: alexander.levin@microsoft.com Cc: daniel@iogearbox.net Cc: davem@davemloft.net Cc: dri-devel@lists.freedesktop.org Cc: duyuyang@gmail.com Cc: gregkh@linuxfoundation.org Cc: hannes@cmpxchg.org Cc: intel-gfx@lists.freedesktop.org Cc: jack@suse.com Cc: jlbec@evilplan.or Cc: joonas.lahtinen@linux.intel.com Cc: joseph.qi@linux.alibaba.com Cc: jslaby@suse.com Cc: juri.lelli@redhat.com Cc: maarten.lankhorst@linux.intel.com Cc: mark@fasheh.com Cc: mhocko@kernel.org Cc: mripard@kernel.org Cc: ocfs2-devel@oss.oracle.com Cc: rodrigo.vivi@intel.com Cc: sean@poorly.run Cc: st@kernel.org Cc: tj@kernel.org Cc: tytso@mit.edu Cc: vdavydov.dev@gmail.com Cc: vincent.guittot@linaro.org Cc: viro@zeniv.linux.org.uk Link: https://lkml.kernel.org/r/1568909380-32199-1-git-send-email-cai@lca.pw Signed-off-by: Ingo Molnar --- include/linux/jbd2.h | 2 +- include/linux/lockdep.h | 21 ++++++++++----------- include/linux/percpu-rwsem.h | 4 ++-- include/linux/rcupdate.h | 2 +- include/linux/rwlock_api_smp.h | 16 ++++++++-------- include/linux/seqlock.h | 4 ++-- include/linux/spinlock_api_smp.h | 8 ++++---- include/linux/ww_mutex.h | 2 +- 8 files changed, 29 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 603fbc4e2f70..564793c24d12 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1170,7 +1170,7 @@ struct journal_s #define jbd2_might_wait_for_commit(j) \ do { \ rwsem_acquire(&j->j_trans_commit_map, 0, 0, _THIS_IP_); \ - rwsem_release(&j->j_trans_commit_map, 1, _THIS_IP_); \ + rwsem_release(&j->j_trans_commit_map, _THIS_IP_); \ } while (0) /* journal feature predicate functions */ diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index b8a835fd611b..c50d01ef1414 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -349,8 +349,7 @@ extern void lock_acquire(struct lockdep_map *lock, unsigned int subclass, int trylock, int read, int check, struct lockdep_map *nest_lock, unsigned long ip); -extern void lock_release(struct lockdep_map *lock, int nested, - unsigned long ip); +extern void lock_release(struct lockdep_map *lock, unsigned long ip); /* * Same "read" as for lock_acquire(), except -1 means any. @@ -428,7 +427,7 @@ static inline void lockdep_set_selftest_task(struct task_struct *task) } # define lock_acquire(l, s, t, r, c, n, i) do { } while (0) -# define lock_release(l, n, i) do { } while (0) +# define lock_release(l, i) do { } while (0) # define lock_downgrade(l, i) do { } while (0) # define lock_set_class(l, n, k, s, i) do { } while (0) # define lock_set_subclass(l, s, i) do { } while (0) @@ -591,42 +590,42 @@ static inline void print_irqtrace_events(struct task_struct *curr) #define spin_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i) #define spin_acquire_nest(l, s, t, n, i) lock_acquire_exclusive(l, s, t, n, i) -#define spin_release(l, n, i) lock_release(l, n, i) +#define spin_release(l, i) lock_release(l, i) #define rwlock_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i) #define rwlock_acquire_read(l, s, t, i) lock_acquire_shared_recursive(l, s, t, NULL, i) -#define rwlock_release(l, n, i) lock_release(l, n, i) +#define rwlock_release(l, i) lock_release(l, i) #define seqcount_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i) #define seqcount_acquire_read(l, s, t, i) lock_acquire_shared_recursive(l, s, t, NULL, i) -#define seqcount_release(l, n, i) lock_release(l, n, i) +#define seqcount_release(l, i) lock_release(l, i) #define mutex_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i) #define mutex_acquire_nest(l, s, t, n, i) lock_acquire_exclusive(l, s, t, n, i) -#define mutex_release(l, n, i) lock_release(l, n, i) +#define mutex_release(l, i) lock_release(l, i) #define rwsem_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i) #define rwsem_acquire_nest(l, s, t, n, i) lock_acquire_exclusive(l, s, t, n, i) #define rwsem_acquire_read(l, s, t, i) lock_acquire_shared(l, s, t, NULL, i) -#define rwsem_release(l, n, i) lock_release(l, n, i) +#define rwsem_release(l, i) lock_release(l, i) #define lock_map_acquire(l) lock_acquire_exclusive(l, 0, 0, NULL, _THIS_IP_) #define lock_map_acquire_read(l) lock_acquire_shared_recursive(l, 0, 0, NULL, _THIS_IP_) #define lock_map_acquire_tryread(l) lock_acquire_shared_recursive(l, 0, 1, NULL, _THIS_IP_) -#define lock_map_release(l) lock_release(l, 1, _THIS_IP_) +#define lock_map_release(l) lock_release(l, _THIS_IP_) #ifdef CONFIG_PROVE_LOCKING # define might_lock(lock) \ do { \ typecheck(struct lockdep_map *, &(lock)->dep_map); \ lock_acquire(&(lock)->dep_map, 0, 0, 0, 1, NULL, _THIS_IP_); \ - lock_release(&(lock)->dep_map, 0, _THIS_IP_); \ + lock_release(&(lock)->dep_map, _THIS_IP_); \ } while (0) # define might_lock_read(lock) \ do { \ typecheck(struct lockdep_map *, &(lock)->dep_map); \ lock_acquire(&(lock)->dep_map, 0, 0, 1, 1, NULL, _THIS_IP_); \ - lock_release(&(lock)->dep_map, 0, _THIS_IP_); \ + lock_release(&(lock)->dep_map, _THIS_IP_); \ } while (0) #define lockdep_assert_irqs_enabled() do { \ diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index 3998cdf9cd14..ad2ca2a89d5b 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -93,7 +93,7 @@ static inline void percpu_up_read(struct percpu_rw_semaphore *sem) __percpu_up_read(sem); /* Unconditional memory barrier */ preempt_enable(); - rwsem_release(&sem->rw_sem.dep_map, 1, _RET_IP_); + rwsem_release(&sem->rw_sem.dep_map, _RET_IP_); } extern void percpu_down_write(struct percpu_rw_semaphore *); @@ -118,7 +118,7 @@ extern void percpu_free_rwsem(struct percpu_rw_semaphore *); static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem, bool read, unsigned long ip) { - lock_release(&sem->rw_sem.dep_map, 1, ip); + lock_release(&sem->rw_sem.dep_map, ip); #ifdef CONFIG_RWSEM_SPIN_ON_OWNER if (!read) atomic_long_set(&sem->rw_sem.owner, RWSEM_OWNER_UNKNOWN); diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 75a2eded7aa2..269b31eab3d6 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -210,7 +210,7 @@ static inline void rcu_lock_acquire(struct lockdep_map *map) static inline void rcu_lock_release(struct lockdep_map *map) { - lock_release(map, 1, _THIS_IP_); + lock_release(map, _THIS_IP_); } extern struct lockdep_map rcu_lock_map; diff --git a/include/linux/rwlock_api_smp.h b/include/linux/rwlock_api_smp.h index 86ebb4bf9c6e..abfb53ab11be 100644 --- a/include/linux/rwlock_api_smp.h +++ b/include/linux/rwlock_api_smp.h @@ -215,14 +215,14 @@ static inline void __raw_write_lock(rwlock_t *lock) static inline void __raw_write_unlock(rwlock_t *lock) { - rwlock_release(&lock->dep_map, 1, _RET_IP_); + rwlock_release(&lock->dep_map, _RET_IP_); do_raw_write_unlock(lock); preempt_enable(); } static inline void __raw_read_unlock(rwlock_t *lock) { - rwlock_release(&lock->dep_map, 1, _RET_IP_); + rwlock_release(&lock->dep_map, _RET_IP_); do_raw_read_unlock(lock); preempt_enable(); } @@ -230,7 +230,7 @@ static inline void __raw_read_unlock(rwlock_t *lock) static inline void __raw_read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) { - rwlock_release(&lock->dep_map, 1, _RET_IP_); + rwlock_release(&lock->dep_map, _RET_IP_); do_raw_read_unlock(lock); local_irq_restore(flags); preempt_enable(); @@ -238,7 +238,7 @@ __raw_read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) static inline void __raw_read_unlock_irq(rwlock_t *lock) { - rwlock_release(&lock->dep_map, 1, _RET_IP_); + rwlock_release(&lock->dep_map, _RET_IP_); do_raw_read_unlock(lock); local_irq_enable(); preempt_enable(); @@ -246,7 +246,7 @@ static inline void __raw_read_unlock_irq(rwlock_t *lock) static inline void __raw_read_unlock_bh(rwlock_t *lock) { - rwlock_release(&lock->dep_map, 1, _RET_IP_); + rwlock_release(&lock->dep_map, _RET_IP_); do_raw_read_unlock(lock); __local_bh_enable_ip(_RET_IP_, SOFTIRQ_LOCK_OFFSET); } @@ -254,7 +254,7 @@ static inline void __raw_read_unlock_bh(rwlock_t *lock) static inline void __raw_write_unlock_irqrestore(rwlock_t *lock, unsigned long flags) { - rwlock_release(&lock->dep_map, 1, _RET_IP_); + rwlock_release(&lock->dep_map, _RET_IP_); do_raw_write_unlock(lock); local_irq_restore(flags); preempt_enable(); @@ -262,7 +262,7 @@ static inline void __raw_write_unlock_irqrestore(rwlock_t *lock, static inline void __raw_write_unlock_irq(rwlock_t *lock) { - rwlock_release(&lock->dep_map, 1, _RET_IP_); + rwlock_release(&lock->dep_map, _RET_IP_); do_raw_write_unlock(lock); local_irq_enable(); preempt_enable(); @@ -270,7 +270,7 @@ static inline void __raw_write_unlock_irq(rwlock_t *lock) static inline void __raw_write_unlock_bh(rwlock_t *lock) { - rwlock_release(&lock->dep_map, 1, _RET_IP_); + rwlock_release(&lock->dep_map, _RET_IP_); do_raw_write_unlock(lock); __local_bh_enable_ip(_RET_IP_, SOFTIRQ_LOCK_OFFSET); } diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index bcf4cf26b8c8..0491d963d47e 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -79,7 +79,7 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s) local_irq_save(flags); seqcount_acquire_read(&l->dep_map, 0, 0, _RET_IP_); - seqcount_release(&l->dep_map, 1, _RET_IP_); + seqcount_release(&l->dep_map, _RET_IP_); local_irq_restore(flags); } @@ -384,7 +384,7 @@ static inline void write_seqcount_begin(seqcount_t *s) static inline void write_seqcount_end(seqcount_t *s) { - seqcount_release(&s->dep_map, 1, _RET_IP_); + seqcount_release(&s->dep_map, _RET_IP_); raw_write_seqcount_end(s); } diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h index b762eaba4cdf..19a9be9d97ee 100644 --- a/include/linux/spinlock_api_smp.h +++ b/include/linux/spinlock_api_smp.h @@ -147,7 +147,7 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock) static inline void __raw_spin_unlock(raw_spinlock_t *lock) { - spin_release(&lock->dep_map, 1, _RET_IP_); + spin_release(&lock->dep_map, _RET_IP_); do_raw_spin_unlock(lock); preempt_enable(); } @@ -155,7 +155,7 @@ static inline void __raw_spin_unlock(raw_spinlock_t *lock) static inline void __raw_spin_unlock_irqrestore(raw_spinlock_t *lock, unsigned long flags) { - spin_release(&lock->dep_map, 1, _RET_IP_); + spin_release(&lock->dep_map, _RET_IP_); do_raw_spin_unlock(lock); local_irq_restore(flags); preempt_enable(); @@ -163,7 +163,7 @@ static inline void __raw_spin_unlock_irqrestore(raw_spinlock_t *lock, static inline void __raw_spin_unlock_irq(raw_spinlock_t *lock) { - spin_release(&lock->dep_map, 1, _RET_IP_); + spin_release(&lock->dep_map, _RET_IP_); do_raw_spin_unlock(lock); local_irq_enable(); preempt_enable(); @@ -171,7 +171,7 @@ static inline void __raw_spin_unlock_irq(raw_spinlock_t *lock) static inline void __raw_spin_unlock_bh(raw_spinlock_t *lock) { - spin_release(&lock->dep_map, 1, _RET_IP_); + spin_release(&lock->dep_map, _RET_IP_); do_raw_spin_unlock(lock); __local_bh_enable_ip(_RET_IP_, SOFTIRQ_LOCK_OFFSET); } diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h index 3af7c0e03be5..d7554252404c 100644 --- a/include/linux/ww_mutex.h +++ b/include/linux/ww_mutex.h @@ -182,7 +182,7 @@ static inline void ww_acquire_done(struct ww_acquire_ctx *ctx) static inline void ww_acquire_fini(struct ww_acquire_ctx *ctx) { #ifdef CONFIG_DEBUG_MUTEXES - mutex_release(&ctx->dep_map, 0, _THIS_IP_); + mutex_release(&ctx->dep_map, _THIS_IP_); DEBUG_LOCKS_WARN_ON(ctx->acquired); if (!IS_ENABLED(CONFIG_PROVE_LOCKING)) -- cgit v1.2.3 From d30cd83f68535ca21412b1abe8684438690c1c2b Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Wed, 9 Oct 2019 08:55:38 -0700 Subject: soc: ti: omap-prm: add support for denying idle for reset clockdomain TI SoCs hardware reset signals require the parent clockdomain to be in force wakeup mode while de-asserting the reset, otherwise it may never complete. To support this, add pdata hooks to control the clockdomain directly. Signed-off-by: Tero Kristo Reviewed-by: Tony Lindgren Signed-off-by: Santosh Shilimkar --- include/linux/platform_data/ti-prm.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 include/linux/platform_data/ti-prm.h (limited to 'include/linux') diff --git a/include/linux/platform_data/ti-prm.h b/include/linux/platform_data/ti-prm.h new file mode 100644 index 000000000000..28154c3226c2 --- /dev/null +++ b/include/linux/platform_data/ti-prm.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * TI PRM (Power & Reset Manager) platform data + * + * Copyright (C) 2019 Texas Instruments, Inc. + * + * Tero Kristo + */ + +#ifndef _LINUX_PLATFORM_DATA_TI_PRM_H +#define _LINUX_PLATFORM_DATA_TI_PRM_H + +struct clockdomain; + +struct ti_prm_platform_data { + void (*clkdm_deny_idle)(struct clockdomain *clkdm); + void (*clkdm_allow_idle)(struct clockdomain *clkdm); + struct clockdomain * (*clkdm_lookup)(const char *name); +}; + +#endif /* _LINUX_PLATFORM_DATA_TI_PRM_H */ -- cgit v1.2.3 From f9bdad8ca8a40270576dd8751ac225febaa87f93 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Mon, 29 Oct 2018 13:23:40 -0400 Subject: NFS NFSD: defining nl4_servers structure needed by both These structures are needed by COPY_NOTIFY on the client and needed by the nfsd as well Reviewed-by: Jeff Layton Signed-off-by: Olga Kornievskaia --- include/linux/nfs4.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index fd59904a282c..5810e248c1bd 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -16,6 +16,7 @@ #include #include #include +#include enum nfs4_acl_whotype { NFS4_ACL_WHO_NAMED = 0, @@ -674,4 +675,27 @@ struct nfs4_op_map { } u; }; +struct nfs42_netaddr { + char netid[RPCBIND_MAXNETIDLEN]; + char addr[RPCBIND_MAXUADDRLEN + 1]; + u32 netid_len; + u32 addr_len; +}; + +enum netloc_type4 { + NL4_NAME = 1, + NL4_URL = 2, + NL4_NETADDR = 3, +}; + +struct nl4_server { + enum netloc_type4 nl4_type; + union { + struct { /* NL4_NAME, NL4_URL */ + int nl4_str_sz; + char nl4_str[NFS4_OPAQUE_LIMIT + 1]; + }; + struct nfs42_netaddr nl4_addr; /* NL4_NETADDR */ + } u; +}; #endif -- cgit v1.2.3 From 0491567b51efeca807da1125a1a0d5193875e286 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Tue, 4 Jun 2019 16:14:30 -0400 Subject: NFS: add COPY_NOTIFY operation Try using the delegation stateid, then the open stateid. Only NL4_NETATTR, No support for NL4_NAME and NL4_URL. Allow only one source server address to be returned for now. To distinguish between same server copy offload ("intra") and a copy between different server ("inter"), do a check of server owner identity and also make sure server is capable of doing a copy offload. Signed-off-by: Andy Adamson Signed-off-by: Olga Kornievskaia --- include/linux/nfs4.h | 1 + include/linux/nfs_fs_sb.h | 1 + include/linux/nfs_xdr.h | 16 ++++++++++++++++ 3 files changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 5810e248c1bd..5e7a5261af4e 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -537,6 +537,7 @@ enum { NFSPROC4_CLNT_CLONE, NFSPROC4_CLNT_COPY, NFSPROC4_CLNT_OFFLOAD_CANCEL, + NFSPROC4_CLNT_COPY_NOTIFY, NFSPROC4_CLNT_LOOKUPP, NFSPROC4_CLNT_LAYOUTERROR, diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index a87fe854f008..e1c8748e1e82 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -276,5 +276,6 @@ struct nfs_server { #define NFS_CAP_COPY (1U << 24) #define NFS_CAP_OFFLOAD_CANCEL (1U << 25) #define NFS_CAP_LAYOUTERROR (1U << 26) +#define NFS_CAP_COPY_NOTIFY (1U << 27) #endif diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 9b8324ec08f3..0a7af40026d7 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1463,6 +1463,22 @@ struct nfs42_offload_status_res { int osr_status; }; +struct nfs42_copy_notify_args { + struct nfs4_sequence_args cna_seq_args; + + struct nfs_fh *cna_src_fh; + nfs4_stateid cna_src_stateid; + struct nl4_server cna_dst; +}; + +struct nfs42_copy_notify_res { + struct nfs4_sequence_res cnr_seq_res; + + struct nfstime4 cnr_lease_time; + nfs4_stateid cnr_stateid; + struct nl4_server cnr_src; +}; + struct nfs42_seek_args { struct nfs4_sequence_args seq_args; -- cgit v1.2.3 From 1d38f3f0d70008671f4dc055697ff3c3bb44a284 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Tue, 4 Jun 2019 11:54:18 -0400 Subject: NFS: add ca_source_server<> to COPY Support only one source server address: the same address that the client and source server use. Signed-off-by: Andy Adamson Signed-off-by: Olga Kornievskaia --- include/linux/nfs_xdr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 0a7af40026d7..008facac8a30 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1435,6 +1435,7 @@ struct nfs42_copy_args { u64 count; bool sync; + struct nl4_server *cp_src; }; struct nfs42_write_res { -- cgit v1.2.3 From 0e65a32c8a569db363048e17a708b1a0913adbef Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Fri, 14 Jun 2019 14:38:40 -0400 Subject: NFS: handle source server reboot When the source server reboots after a server-to-server copy was issued, we need to retry the copy from COPY_NOTIFY. We need to detect that the source server rebooted and there is a copy waiting on a destination server and wake it up. Signed-off-by: Olga Kornievskaia --- include/linux/nfs_fs.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 570a60c2f4f4..c06b1fd130f3 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -189,13 +189,15 @@ struct nfs_inode { struct nfs4_copy_state { struct list_head copies; + struct list_head src_copies; nfs4_stateid stateid; struct completion completion; uint64_t count; struct nfs_writeverf verf; int error; int flags; - struct nfs4_state *parent_state; + struct nfs4_state *parent_src_state; + struct nfs4_state *parent_dst_state; }; /* -- cgit v1.2.3 From 0d3c8501e32e19c9bdfc65e5d4e17e7772729332 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Tue, 8 Oct 2019 10:13:55 -0700 Subject: Input: pixcir_i2c_ts - move definitions into a single file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All the defined symbols from linux/platform_data/pixcir_i2c_ts.h are only used by the pixcir_i2c_ts driver, so move all the definitions locally and get rid of the pixcir_i2c_ts.h file. Signed-off-by: Fabio Estevam Reviewed-by: Roger Quadros Tested-by: Michal Vokáč Signed-off-by: Dmitry Torokhov --- include/linux/platform_data/pixcir_i2c_ts.h | 64 ----------------------------- 1 file changed, 64 deletions(-) delete mode 100644 include/linux/platform_data/pixcir_i2c_ts.h (limited to 'include/linux') diff --git a/include/linux/platform_data/pixcir_i2c_ts.h b/include/linux/platform_data/pixcir_i2c_ts.h deleted file mode 100644 index 4ab3cd6f1cc2..000000000000 --- a/include/linux/platform_data/pixcir_i2c_ts.h +++ /dev/null @@ -1,64 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _PIXCIR_I2C_TS_H -#define _PIXCIR_I2C_TS_H - -/* - * Register map - */ -#define PIXCIR_REG_POWER_MODE 51 -#define PIXCIR_REG_INT_MODE 52 - -/* - * Power modes: - * active: max scan speed - * idle: lower scan speed with automatic transition to active on touch - * halt: datasheet says sleep but this is more like halt as the chip - * clocks are cut and it can only be brought out of this mode - * using the RESET pin. - */ -enum pixcir_power_mode { - PIXCIR_POWER_ACTIVE, - PIXCIR_POWER_IDLE, - PIXCIR_POWER_HALT, -}; - -#define PIXCIR_POWER_MODE_MASK 0x03 -#define PIXCIR_POWER_ALLOW_IDLE (1UL << 2) - -/* - * Interrupt modes: - * periodical: interrupt is asserted periodicaly - * diff coordinates: interrupt is asserted when coordinates change - * level on touch: interrupt level asserted during touch - * pulse on touch: interrupt pulse asserted druing touch - * - */ -enum pixcir_int_mode { - PIXCIR_INT_PERIODICAL, - PIXCIR_INT_DIFF_COORD, - PIXCIR_INT_LEVEL_TOUCH, - PIXCIR_INT_PULSE_TOUCH, -}; - -#define PIXCIR_INT_MODE_MASK 0x03 -#define PIXCIR_INT_ENABLE (1UL << 3) -#define PIXCIR_INT_POL_HIGH (1UL << 2) - -/** - * struct pixcir_irc_chip_data - chip related data - * @max_fingers: Max number of fingers reported simultaneously by h/w - * @has_hw_ids: Hardware supports finger tracking IDs - * - */ -struct pixcir_i2c_chip_data { - u8 max_fingers; - bool has_hw_ids; -}; - -struct pixcir_ts_platform_data { - int x_max; - int y_max; - struct pixcir_i2c_chip_data chip; -}; - -#endif -- cgit v1.2.3 From 690a6ca7df3de7b90546bc10a620d1ac8ccaa1a1 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 8 Oct 2019 21:03:14 -0700 Subject: DIM: fix dim.h kernel-doc and headers Lots of fixes to kernel-doc in structs, enums, and functions. Also add header files that are being used but not yet #included. Signed-off-by: Randy Dunlap Cc: Yamin Friedman Cc: Tal Gilboa Cc: Saeed Mahameed Cc: Doug Ledford Cc: Jason Gunthorpe Cc: linux-rdma@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Jakub Kicinski --- include/linux/dim.h | 63 ++++++++++++++++++++++++++++------------------------- 1 file changed, 33 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dim.h b/include/linux/dim.h index 9fa4b3f88c39..b698266d0035 100644 --- a/include/linux/dim.h +++ b/include/linux/dim.h @@ -4,22 +4,26 @@ #ifndef DIM_H #define DIM_H +#include +#include #include +#include +#include -/** +/* * Number of events between DIM iterations. * Causes a moderation of the algorithm run. */ #define DIM_NEVENTS 64 -/** +/* * Is a difference between values justifies taking an action. * We consider 10% difference as significant. */ #define IS_SIGNIFICANT_DIFF(val, ref) \ (((100UL * abs((val) - (ref))) / (ref)) > 10) -/** +/* * Calculate the gap between two values. * Take wrap-around and variable size into consideration. */ @@ -27,12 +31,13 @@ & (BIT_ULL(bits) - 1)) /** - * Structure for CQ moderation values. + * struct dim_cq_moder - Structure for CQ moderation values. * Used for communications between DIM and its consumer. * * @usec: CQ timer suggestion (by DIM) * @pkts: CQ packet counter suggestion (by DIM) - * @cq_period_mode: CQ priod count mode (from CQE/EQE) + * @comps: Completion counter + * @cq_period_mode: CQ period count mode (from CQE/EQE) */ struct dim_cq_moder { u16 usec; @@ -42,13 +47,14 @@ struct dim_cq_moder { }; /** - * Structure for DIM sample data. + * struct dim_sample - Structure for DIM sample data. * Used for communications between DIM and its consumer. * * @time: Sample timestamp * @pkt_ctr: Number of packets * @byte_ctr: Number of bytes * @event_ctr: Number of events + * @comp_ctr: Current completion counter */ struct dim_sample { ktime_t time; @@ -59,12 +65,14 @@ struct dim_sample { }; /** - * Structure for DIM stats. + * struct dim_stats - Structure for DIM stats. * Used for holding current measured rates. * * @ppms: Packets per msec * @bpms: Bytes per msec * @epms: Events per msec + * @cpms: Completions per msec + * @cpe_ratio: Ratio of completions to events */ struct dim_stats { int ppms; /* packets per msec */ @@ -75,12 +83,13 @@ struct dim_stats { }; /** - * Main structure for dynamic interrupt moderation (DIM). + * struct dim - Main structure for dynamic interrupt moderation (DIM). * Used for holding all information about a specific DIM instance. * * @state: Algorithm state (see below) * @prev_stats: Measured rates from previous iteration (for comparison) * @start_sample: Sampled data at start of current iteration + * @measuring_sample: A &dim_sample that is used to update the current events * @work: Work to perform on action required * @priv: A pointer to the struct that points to dim * @profile_ix: Current moderation profile @@ -106,24 +115,21 @@ struct dim { }; /** - * enum dim_cq_period_mode - * - * These are the modes for CQ period count. + * enum dim_cq_period_mode - Modes for CQ period count * * @DIM_CQ_PERIOD_MODE_START_FROM_EQE: Start counting from EQE * @DIM_CQ_PERIOD_MODE_START_FROM_CQE: Start counting from CQE (implies timer reset) * @DIM_CQ_PERIOD_NUM_MODES: Number of modes */ -enum { +enum dim_cq_period_mode { DIM_CQ_PERIOD_MODE_START_FROM_EQE = 0x0, DIM_CQ_PERIOD_MODE_START_FROM_CQE = 0x1, DIM_CQ_PERIOD_NUM_MODES }; /** - * enum dim_state + * enum dim_state - DIM algorithm states * - * These are the DIM algorithm states. * These will determine if the algorithm is in a valid state to start an iteration. * * @DIM_START_MEASURE: This is the first iteration (also after applying a new profile) @@ -131,16 +137,15 @@ enum { * need to perform an action * @DIM_APPLY_NEW_PROFILE: DIM consumer is currently applying a profile - no need to measure */ -enum { +enum dim_state { DIM_START_MEASURE, DIM_MEASURE_IN_PROGRESS, DIM_APPLY_NEW_PROFILE, }; /** - * enum dim_tune_state + * enum dim_tune_state - DIM algorithm tune states * - * These are the DIM algorithm tune states. * These will determine which action the algorithm should perform. * * @DIM_PARKING_ON_TOP: Algorithm found a local top point - exit on significant difference @@ -148,7 +153,7 @@ enum { * @DIM_GOING_RIGHT: Algorithm is currently trying higher moderation levels * @DIM_GOING_LEFT: Algorithm is currently trying lower moderation levels */ -enum { +enum dim_tune_state { DIM_PARKING_ON_TOP, DIM_PARKING_TIRED, DIM_GOING_RIGHT, @@ -156,25 +161,23 @@ enum { }; /** - * enum dim_stats_state + * enum dim_stats_state - DIM algorithm statistics states * - * These are the DIM algorithm statistics states. * These will determine the verdict of current iteration. * * @DIM_STATS_WORSE: Current iteration shows worse performance than before - * @DIM_STATS_WORSE: Current iteration shows same performance than before - * @DIM_STATS_WORSE: Current iteration shows better performance than before + * @DIM_STATS_SAME: Current iteration shows same performance than before + * @DIM_STATS_BETTER: Current iteration shows better performance than before */ -enum { +enum dim_stats_state { DIM_STATS_WORSE, DIM_STATS_SAME, DIM_STATS_BETTER, }; /** - * enum dim_step_result + * enum dim_step_result - DIM algorithm step results * - * These are the DIM algorithm step results. * These describe the result of a step. * * @DIM_STEPPED: Performed a regular step @@ -182,7 +185,7 @@ enum { * tired parking * @DIM_ON_EDGE: Stepped to the most left/right profile */ -enum { +enum dim_step_result { DIM_STEPPED, DIM_TOO_TIRED, DIM_ON_EDGE, @@ -199,7 +202,7 @@ enum { bool dim_on_top(struct dim *dim); /** - * dim_turn - change profile alterning direction + * dim_turn - change profile altering direction * @dim: DIM context * * Go left if we were going right and vice-versa. @@ -238,7 +241,7 @@ void dim_calc_stats(struct dim_sample *start, struct dim_sample *end, struct dim_stats *curr_stats); /** - * dim_update_sample - set a sample's fields with give values + * dim_update_sample - set a sample's fields with given values * @event_ctr: number of events to set * @packets: number of packets to set * @bytes: number of bytes to set @@ -304,8 +307,8 @@ struct dim_cq_moder net_dim_get_def_tx_moderation(u8 cq_period_mode); * @end_sample: Current data measurement * * Called by the consumer. - * This is the main logic of the algorithm, where data is processed in order to decide on next - * required action. + * This is the main logic of the algorithm, where data is processed in order + * to decide on next required action. */ void net_dim(struct dim *dim, struct dim_sample end_sample); -- cgit v1.2.3 From c31e73121f4c1ec45a3e523ac6ce3ce6dafdcec1 Mon Sep 17 00:00:00 2001 From: Murali Nalajala Date: Mon, 7 Oct 2019 13:37:42 -0700 Subject: base: soc: Handle custom soc information sysfs entries Soc framework exposed sysfs entries are not sufficient for some of the h/w platforms. Currently there is no interface where soc drivers can expose further information about their SoCs via soc framework. This change address this limitation where clients can pass their custom entries as attribute group and soc framework would expose them as sysfs properties. Signed-off-by: Murali Nalajala Reviewed-by: Bjorn Andersson Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/1570480662-25252-1-git-send-email-mnalajal@codeaurora.org Signed-off-by: Greg Kroah-Hartman --- include/linux/sys_soc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sys_soc.h b/include/linux/sys_soc.h index 48ceea867dd6..d9b3cf0f410c 100644 --- a/include/linux/sys_soc.h +++ b/include/linux/sys_soc.h @@ -15,6 +15,7 @@ struct soc_device_attribute { const char *serial_number; const char *soc_id; const void *data; + const struct attribute_group *custom_attr_group; }; /** -- cgit v1.2.3 From 977da0738f3ba3569b883ed6209c300bfcb695d4 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 30 Sep 2019 14:14:35 +0200 Subject: crypto: inside-secure - Remove #ifdef checks When both PCI and OF are disabled, no drivers are registered, and we get some unused-function warnings: drivers/crypto/inside-secure/safexcel.c:1221:13: error: unused function 'safexcel_unregister_algorithms' [-Werror,-Wunused-function] static void safexcel_unregister_algorithms(struct safexcel_crypto_priv *priv) drivers/crypto/inside-secure/safexcel.c:1307:12: error: unused function 'safexcel_probe_generic' [-Werror,-Wunused-function] static int safexcel_probe_generic(void *pdev, drivers/crypto/inside-secure/safexcel.c:1531:13: error: unused function 'safexcel_hw_reset_rings' [-Werror,-Wunused-function] static void safexcel_hw_reset_rings(struct safexcel_crypto_priv *priv) It's better to make the compiler see what is going on and remove such ifdef checks completely. In case of PCI, this is trivial since pci_register_driver() is defined to an empty function that makes the compiler subsequently drop all unused code silently. The global pcireg_rc/ofreg_rc variables are not actually needed here since the driver registration does not fail in ways that would make it helpful. For CONFIG_OF, an IS_ENABLED() check is still required, since platform drivers can exist both with and without it. A little change to linux/pci.h is needed to ensure that pcim_enable_device() is visible to the driver. Moving the declaration outside of ifdef would be sufficient here, but for consistency with the rest of the file, adding an inline helper is probably best. Fixes: 212ef6f29e5b ("crypto: inside-secure - Fix unused variable warning when CONFIG_PCI=n") Signed-off-by: Arnd Bergmann Acked-by: Bjorn Helgaas # pci.h Signed-off-by: Herbert Xu --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index f9088c89a534..1a6cf19eac2d 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1686,6 +1686,7 @@ static inline struct pci_dev *pci_get_class(unsigned int class, static inline void pci_set_master(struct pci_dev *dev) { } static inline int pci_enable_device(struct pci_dev *dev) { return -EIO; } static inline void pci_disable_device(struct pci_dev *dev) { } +static inline int pcim_enable_device(struct pci_dev *pdev) { return -EIO; } static inline int pci_assign_resource(struct pci_dev *dev, int i) { return -EBUSY; } static inline int __pci_register_driver(struct pci_driver *drv, -- cgit v1.2.3 From fefad9ef58ffc228f7b78b667c2aea8267503350 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 24 Sep 2019 08:44:20 +0200 Subject: seccomp: simplify secure_computing() Afaict, the struct seccomp_data argument to secure_computing() is unused by all current callers. So let's remove it. The argument was added in [1]. It was added because having the arch supply the syscall arguments used to be faster than having it done by secure_computing() (cf. Andy's comment in [2]). This is not true anymore though. /* References */ [1]: 2f275de5d1ed ("seccomp: Add a seccomp_data parameter secure_computing()") [2]: https://lore.kernel.org/r/CALCETrU_fs_At-hTpr231kpaAd0z7xJN4ku-DvzhRU6cvcJA_w@mail.gmail.com Signed-off-by: Christian Brauner Cc: Andy Lutomirski Cc: Thomas Gleixner Cc: Will Drewry Cc: Oleg Nesterov Cc: linux-arm-kernel@lists.infradead.org Cc: linux-parisc@vger.kernel.org Cc: linux-s390@vger.kernel.org Cc: linux-um@lists.infradead.org Cc: x86@kernel.org Acked-by: Borislav Petkov Acked-by: Andy Lutomirski Link: https://lore.kernel.org/r/20190924064420.6353-1-christian.brauner@ubuntu.com Signed-off-by: Kees Cook --- include/linux/seccomp.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index 84868d37b35d..03583b6d1416 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -33,10 +33,10 @@ struct seccomp { #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER extern int __secure_computing(const struct seccomp_data *sd); -static inline int secure_computing(const struct seccomp_data *sd) +static inline int secure_computing(void) { if (unlikely(test_thread_flag(TIF_SECCOMP))) - return __secure_computing(sd); + return __secure_computing(NULL); return 0; } #else @@ -59,7 +59,7 @@ struct seccomp { }; struct seccomp_filter { }; #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER -static inline int secure_computing(struct seccomp_data *sd) { return 0; } +static inline int secure_computing(void) { return 0; } #else static inline void secure_computing_strict(int this_syscall) { return; } #endif -- cgit v1.2.3 From 56c9aa07942434490890ac35bba99026e66cb949 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Thu, 3 Oct 2019 15:32:09 +0300 Subject: software node: Make argument to to_software_node const to_software_node() does not need to modify the fwnode_handle it operates on; therefore make it const. This allows passing a const fwnode_handle to to_software_node(). Signed-off-by: Sakari Ailus Reviewed-by: Andy Shevchenko Reviewed-by: Heikki Krogerus Signed-off-by: Rafael J. Wysocki --- include/linux/property.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/property.h b/include/linux/property.h index 9b3d4ca3a73a..87d795a1e2d6 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -418,7 +418,8 @@ struct software_node { }; bool is_software_node(const struct fwnode_handle *fwnode); -const struct software_node *to_software_node(struct fwnode_handle *fwnode); +const struct software_node * +to_software_node(const struct fwnode_handle *fwnode); struct fwnode_handle *software_node_fwnode(const struct software_node *node); const struct software_node * -- cgit v1.2.3 From 87e5e95db31a27d117fbb4a5d464f44adb4c2ee2 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Thu, 3 Oct 2019 15:32:11 +0300 Subject: device property: Add functions for accessing node's parents Add two convenience functions for accessing node's parents: fwnode_count_parents() returns the number of parent nodes a given node has. fwnode_get_nth_parent() returns node's parent at a given distance from the node itself. Signed-off-by: Sakari Ailus Reviewed-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki --- include/linux/property.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/property.h b/include/linux/property.h index 87d795a1e2d6..ea27c5811e1b 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -83,6 +83,9 @@ struct fwnode_handle *fwnode_find_reference(const struct fwnode_handle *fwnode, struct fwnode_handle *fwnode_get_parent(const struct fwnode_handle *fwnode); struct fwnode_handle *fwnode_get_next_parent( struct fwnode_handle *fwnode); +unsigned int fwnode_count_parents(const struct fwnode_handle *fwn); +struct fwnode_handle *fwnode_get_nth_parent(struct fwnode_handle *fwn, + unsigned int depth); struct fwnode_handle *fwnode_get_next_child_node( const struct fwnode_handle *fwnode, struct fwnode_handle *child); struct fwnode_handle *fwnode_get_next_available_child_node( -- cgit v1.2.3 From bc0500c1e43d95cca5352d2345fb0769f314ba22 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Thu, 3 Oct 2019 15:32:12 +0300 Subject: device property: Add fwnode_get_name for returning the name of a node The fwnode framework did not have means to obtain the name of a node. Add that now, in form of the fwnode_get_name() function and a corresponding get_name fwnode op. OF and ACPI support is included. Signed-off-by: Sakari Ailus Acked-by: Rob Herring (for OF) Reviewed-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki --- include/linux/fwnode.h | 2 ++ include/linux/property.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index ababd6bc82f3..2bbf55739a57 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -56,6 +56,7 @@ struct fwnode_reference_args { * otherwise. * @property_read_string_array: Read an array of string properties. Return zero * on success, a negative error code otherwise. + * @get_name: Return the name of an fwnode. * @get_parent: Return the parent of an fwnode. * @get_next_child_node: Return the next child node in an iteration. * @get_named_child_node: Return a child node with a given name. @@ -82,6 +83,7 @@ struct fwnode_operations { (*property_read_string_array)(const struct fwnode_handle *fwnode_handle, const char *propname, const char **val, size_t nval); + const char *(*get_name)(const struct fwnode_handle *fwnode); struct fwnode_handle *(*get_parent)(const struct fwnode_handle *fwnode); struct fwnode_handle * (*get_next_child_node)(const struct fwnode_handle *fwnode, diff --git a/include/linux/property.h b/include/linux/property.h index ea27c5811e1b..afa84c47bf16 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -80,6 +80,7 @@ struct fwnode_handle *fwnode_find_reference(const struct fwnode_handle *fwnode, const char *name, unsigned int index); +const char *fwnode_get_name(const struct fwnode_handle *fwnode); struct fwnode_handle *fwnode_get_parent(const struct fwnode_handle *fwnode); struct fwnode_handle *fwnode_get_next_parent( struct fwnode_handle *fwnode); -- cgit v1.2.3 From e7e242bccb209b5f73455b33928b8680cc6e3319 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Thu, 3 Oct 2019 15:32:13 +0300 Subject: device property: Add a function to obtain a node's prefix The prefix is used for printing purpose before a node, and it also works as a separator between two nodes. Signed-off-by: Sakari Ailus Acked-by: Rob Herring (for OF) Reviewed-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki --- include/linux/fwnode.h | 2 ++ include/linux/property.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 2bbf55739a57..a5673c4674cf 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -57,6 +57,7 @@ struct fwnode_reference_args { * @property_read_string_array: Read an array of string properties. Return zero * on success, a negative error code otherwise. * @get_name: Return the name of an fwnode. + * @get_name_prefix: Get a prefix for a node (for printing purposes). * @get_parent: Return the parent of an fwnode. * @get_next_child_node: Return the next child node in an iteration. * @get_named_child_node: Return a child node with a given name. @@ -84,6 +85,7 @@ struct fwnode_operations { const char *propname, const char **val, size_t nval); const char *(*get_name)(const struct fwnode_handle *fwnode); + const char *(*get_name_prefix)(const struct fwnode_handle *fwnode); struct fwnode_handle *(*get_parent)(const struct fwnode_handle *fwnode); struct fwnode_handle * (*get_next_child_node)(const struct fwnode_handle *fwnode, diff --git a/include/linux/property.h b/include/linux/property.h index afa84c47bf16..054661109661 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -81,6 +81,7 @@ struct fwnode_handle *fwnode_find_reference(const struct fwnode_handle *fwnode, unsigned int index); const char *fwnode_get_name(const struct fwnode_handle *fwnode); +const char *fwnode_get_name_prefix(const struct fwnode_handle *fwnode); struct fwnode_handle *fwnode_get_parent(const struct fwnode_handle *fwnode); struct fwnode_handle *fwnode_get_next_parent( struct fwnode_handle *fwnode); -- cgit v1.2.3 From 8ed61d36050c57d9eba09511f53d683fd63b04d1 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Tue, 8 Oct 2019 16:26:06 +0300 Subject: device property: Fix the description of struct fwnode_operations Adding description for the device_is_available member which was missing, and fixing the description of the member property_read_int_array. Signed-off-by: Heikki Krogerus Reviewed-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki --- include/linux/fwnode.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index a5673c4674cf..9d9dc444d787 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -49,11 +49,11 @@ struct fwnode_reference_args { * struct fwnode_operations - Operations for fwnode interface * @get: Get a reference to an fwnode. * @put: Put a reference to an fwnode. + * @device_is_available: Return true if the device is available. * @device_get_match_data: Return the device driver match data. * @property_present: Return true if a property is present. - * @property_read_integer_array: Read an array of integer properties. Return - * zero on success, a negative error code - * otherwise. + * @property_read_int_array: Read an array of integer properties. Return zero on + * success, a negative error code otherwise. * @property_read_string_array: Read an array of string properties. Return zero * on success, a negative error code otherwise. * @get_name: Return the name of an fwnode. -- cgit v1.2.3 From 84a081f60db63aaae3665118203506aa09a7f94f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 11 Oct 2019 11:11:40 -0700 Subject: bpf: Align struct bpf_prog_stats Do not risk spanning these small structures on two cache lines. Signed-off-by: Eric Dumazet Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20191011181140.2898-1-edumazet@google.com --- include/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5b9d22338606..282e28bf41ec 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -363,7 +363,7 @@ struct bpf_prog_stats { u64 cnt; u64 nsecs; struct u64_stats_sync syncp; -}; +} __aligned(2 * sizeof(u64)); struct bpf_prog_aux { atomic_t refcnt; -- cgit v1.2.3 From 6b7fe77c334ae59fed9500140e08f4f896b36871 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 9 Aug 2019 14:22:40 +0100 Subject: arm/arm64: smccc/psci: add arm_smccc_1_1_get_conduit() SMCCC callers are currently amassing a collection of enums for the SMCCC conduit, and are having to dig into the PSCI driver's internals in order to figure out what to do. Let's clean this up, with common SMCCC_CONDUIT_* definitions, and an arm_smccc_1_1_get_conduit() helper that abstracts the PSCI driver's internal state. We can kill off the PSCI_CONDUIT_* definitions once we've migrated users over to the new interface. Signed-off-by: Mark Rutland Acked-by: Lorenzo Pieralisi Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- include/linux/arm-smccc.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 080012a6f025..df01a8579034 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -80,6 +80,22 @@ #include #include + +enum arm_smccc_conduit { + SMCCC_CONDUIT_NONE, + SMCCC_CONDUIT_SMC, + SMCCC_CONDUIT_HVC, +}; + +/** + * arm_smccc_1_1_get_conduit() + * + * Returns the conduit to be used for SMCCCv1.1 or later. + * + * When SMCCCv1.1 is not present, returns SMCCC_CONDUIT_NONE. + */ +enum arm_smccc_conduit arm_smccc_1_1_get_conduit(void); + /** * struct arm_smccc_res - Result from SMC/HVC call * @a0-a3 result values from registers 0 to 3 -- cgit v1.2.3 From a5520eac4d2dafb7a48c1b0f1c486afcebd6fe0d Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 9 Aug 2019 14:22:43 +0100 Subject: firmware/psci: use common SMCCC_CONDUIT_* Now that we have common SMCCC_CONDUIT_* definitions, migrate the PSCI code over to them, and kill off the old PSCI_CONDUIT_* definitions. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Acked-by: Lorenzo Pieralisi Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- include/linux/psci.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/psci.h b/include/linux/psci.h index e2bacc6fd2f2..ebe0a881d13d 100644 --- a/include/linux/psci.h +++ b/include/linux/psci.h @@ -7,6 +7,7 @@ #ifndef __LINUX_PSCI_H #define __LINUX_PSCI_H +#include #include #include @@ -18,12 +19,6 @@ bool psci_tos_resident_on(int cpu); int psci_cpu_suspend_enter(u32 state); bool psci_power_state_is_valid(u32 state); -enum psci_conduit { - PSCI_CONDUIT_NONE, - PSCI_CONDUIT_SMC, - PSCI_CONDUIT_HVC, -}; - enum smccc_version { SMCCC_VERSION_1_0, SMCCC_VERSION_1_1, @@ -38,7 +33,7 @@ struct psci_operations { int (*affinity_info)(unsigned long target_affinity, unsigned long lowest_affinity_level); int (*migrate_info_type)(void); - enum psci_conduit conduit; + enum arm_smccc_conduit conduit; enum smccc_version smccc_version; }; -- cgit v1.2.3 From e6ea46511b1ae8c4491904c79411fcd29139af14 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 9 Aug 2019 14:22:44 +0100 Subject: firmware: arm_sdei: use common SMCCC_CONDUIT_* Now that we have common definitions for SMCCC conduits, move the SDEI code over to them, and remove the SDEI-specific definitions. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Acked-by: Lorenzo Pieralisi Acked-by: James Morse Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- include/linux/arm_sdei.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h index 3305ea7f9dc7..0a241c5c911d 100644 --- a/include/linux/arm_sdei.h +++ b/include/linux/arm_sdei.h @@ -5,12 +5,6 @@ #include -enum sdei_conduit_types { - CONDUIT_INVALID = 0, - CONDUIT_SMC, - CONDUIT_HVC, -}; - #include #ifdef CONFIG_ARM_SDE_INTERFACE -- cgit v1.2.3 From 734f9246e791d8da278957b2c326d7709b2a97c0 Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Wed, 11 Sep 2019 20:25:46 +0200 Subject: mm: refresh ZONE_DMA and ZONE_DMA32 comments in 'enum zone_type' These zones usage has evolved with time and the comments were outdated. This joins both ZONE_DMA and ZONE_DMA32 explanation and gives up to date examples on how they are used on different architectures. Signed-off-by: Nicolas Saenz Julienne Reviewed-by: Christoph Hellwig Reviewed-by: Catalin Marinas Signed-off-by: Catalin Marinas --- include/linux/mmzone.h | 45 ++++++++++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index bda20282746b..b0a36d1580b6 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -359,33 +359,40 @@ struct per_cpu_nodestat { #endif /* !__GENERATING_BOUNDS.H */ enum zone_type { -#ifdef CONFIG_ZONE_DMA /* - * ZONE_DMA is used when there are devices that are not able - * to do DMA to all of addressable memory (ZONE_NORMAL). Then we - * carve out the portion of memory that is needed for these devices. - * The range is arch specific. + * ZONE_DMA and ZONE_DMA32 are used when there are peripherals not able + * to DMA to all of the addressable memory (ZONE_NORMAL). + * On architectures where this area covers the whole 32 bit address + * space ZONE_DMA32 is used. ZONE_DMA is left for the ones with smaller + * DMA addressing constraints. This distinction is important as a 32bit + * DMA mask is assumed when ZONE_DMA32 is defined. Some 64-bit + * platforms may need both zones as they support peripherals with + * different DMA addressing limitations. + * + * Some examples: + * + * - i386 and x86_64 have a fixed 16M ZONE_DMA and ZONE_DMA32 for the + * rest of the lower 4G. + * + * - arm only uses ZONE_DMA, the size, up to 4G, may vary depending on + * the specific device. + * + * - arm64 has a fixed 1G ZONE_DMA and ZONE_DMA32 for the rest of the + * lower 4G. * - * Some examples + * - powerpc only uses ZONE_DMA, the size, up to 2G, may vary + * depending on the specific device. * - * Architecture Limit - * --------------------------- - * parisc, ia64, sparc <4G - * s390, powerpc <2G - * arm Various - * alpha Unlimited or 0-16MB. + * - s390 uses ZONE_DMA fixed to the lower 2G. * - * i386, x86_64 and multiple other arches - * <16M. + * - ia64 and riscv only use ZONE_DMA32. + * + * - parisc uses neither. */ +#ifdef CONFIG_ZONE_DMA ZONE_DMA, #endif #ifdef CONFIG_ZONE_DMA32 - /* - * x86_64 needs two ZONE_DMAs because it supports devices that are - * only able to do DMA to the lower 16M but also 32 bit devices that - * can only do DMA areas below 4G. - */ ZONE_DMA32, #endif /* -- cgit v1.2.3 From 9655ac4aca20d654769b8f0d6a5be34b7ce7bad1 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 11 Oct 2019 15:29:24 +0200 Subject: debugfs: remove return value of debugfs_create_u8() No one checks the return value of debugfs_create_u8(), as it's not needed, so make the return value void, so that no one tries to do so in the future. Signed-off-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20191011132931.1186197-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/debugfs.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 58424eb3b329..8e071f599245 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -97,8 +97,8 @@ ssize_t debugfs_attr_write(struct file *file, const char __user *buf, struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, struct dentry *new_dir, const char *new_name); -struct dentry *debugfs_create_u8(const char *name, umode_t mode, - struct dentry *parent, u8 *value); +void debugfs_create_u8(const char *name, umode_t mode, struct dentry *parent, + u8 *value); struct dentry *debugfs_create_u16(const char *name, umode_t mode, struct dentry *parent, u16 *value); struct dentry *debugfs_create_u32(const char *name, umode_t mode, @@ -244,12 +244,8 @@ static inline struct dentry *debugfs_rename(struct dentry *old_dir, struct dentr return ERR_PTR(-ENODEV); } -static inline struct dentry *debugfs_create_u8(const char *name, umode_t mode, - struct dentry *parent, - u8 *value) -{ - return ERR_PTR(-ENODEV); -} +static inline void debugfs_create_u8(const char *name, umode_t mode, + struct dentry *parent, u8 *value) { } static inline struct dentry *debugfs_create_u16(const char *name, umode_t mode, struct dentry *parent, -- cgit v1.2.3 From 313f5dbba41d905d59c820bb2d91ee6c661aff99 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 11 Oct 2019 15:29:25 +0200 Subject: debugfs: remove return value of debugfs_create_u16() No one checks the return value of debugfs_create_u16(), as it's not needed, so make the return value void, so that no one tries to do so in the future. Signed-off-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20191011132931.1186197-2-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/debugfs.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 8e071f599245..c83a33a76b6c 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -99,8 +99,8 @@ struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, void debugfs_create_u8(const char *name, umode_t mode, struct dentry *parent, u8 *value); -struct dentry *debugfs_create_u16(const char *name, umode_t mode, - struct dentry *parent, u16 *value); +void debugfs_create_u16(const char *name, umode_t mode, struct dentry *parent, + u16 *value); struct dentry *debugfs_create_u32(const char *name, umode_t mode, struct dentry *parent, u32 *value); struct dentry *debugfs_create_u64(const char *name, umode_t mode, @@ -247,12 +247,8 @@ static inline struct dentry *debugfs_rename(struct dentry *old_dir, struct dentr static inline void debugfs_create_u8(const char *name, umode_t mode, struct dentry *parent, u8 *value) { } -static inline struct dentry *debugfs_create_u16(const char *name, umode_t mode, - struct dentry *parent, - u16 *value) -{ - return ERR_PTR(-ENODEV); -} +static inline void debugfs_create_u16(const char *name, umode_t mode, + struct dentry *parent, u16 *value) { } static inline struct dentry *debugfs_create_u32(const char *name, umode_t mode, struct dentry *parent, -- cgit v1.2.3 From ad26221fb9e64e69e32a2caf58dba067ca4e815e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 11 Oct 2019 15:29:26 +0200 Subject: debugfs: remove return value of debugfs_create_u64() No one checks the return value of debugfs_create_u64(), as it's not needed, so make the return value void, so that no one tries to do so in the future. Signed-off-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20191011132931.1186197-3-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/debugfs.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index c83a33a76b6c..b3bed4d61733 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -103,8 +103,8 @@ void debugfs_create_u16(const char *name, umode_t mode, struct dentry *parent, u16 *value); struct dentry *debugfs_create_u32(const char *name, umode_t mode, struct dentry *parent, u32 *value); -struct dentry *debugfs_create_u64(const char *name, umode_t mode, - struct dentry *parent, u64 *value); +void debugfs_create_u64(const char *name, umode_t mode, struct dentry *parent, + u64 *value); struct dentry *debugfs_create_ulong(const char *name, umode_t mode, struct dentry *parent, unsigned long *value); struct dentry *debugfs_create_x8(const char *name, umode_t mode, @@ -257,12 +257,8 @@ static inline struct dentry *debugfs_create_u32(const char *name, umode_t mode, return ERR_PTR(-ENODEV); } -static inline struct dentry *debugfs_create_u64(const char *name, umode_t mode, - struct dentry *parent, - u64 *value) -{ - return ERR_PTR(-ENODEV); -} +static inline void debugfs_create_u64(const char *name, umode_t mode, + struct dentry *parent, u64 *value) { } static inline struct dentry *debugfs_create_ulong(const char *name, umode_t mode, -- cgit v1.2.3 From 8e5802635f0f9f7329ec8ffdec15479946c99fb1 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 11 Oct 2019 15:29:27 +0200 Subject: debugfs: remove return value of debugfs_create_size_t() No one checks the return value of debugfs_create_size_t(), as it's not needed, so make the return value void, so that no one tries to do so in the future. Signed-off-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20191011132931.1186197-4-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/debugfs.h | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index b3bed4d61733..1d859bc657bd 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -115,8 +115,8 @@ struct dentry *debugfs_create_x32(const char *name, umode_t mode, struct dentry *parent, u32 *value); struct dentry *debugfs_create_x64(const char *name, umode_t mode, struct dentry *parent, u64 *value); -struct dentry *debugfs_create_size_t(const char *name, umode_t mode, - struct dentry *parent, size_t *value); +void debugfs_create_size_t(const char *name, umode_t mode, + struct dentry *parent, size_t *value); struct dentry *debugfs_create_atomic_t(const char *name, umode_t mode, struct dentry *parent, atomic_t *value); struct dentry *debugfs_create_bool(const char *name, umode_t mode, @@ -296,12 +296,9 @@ static inline struct dentry *debugfs_create_x64(const char *name, umode_t mode, return ERR_PTR(-ENODEV); } -static inline struct dentry *debugfs_create_size_t(const char *name, umode_t mode, - struct dentry *parent, - size_t *value) -{ - return ERR_PTR(-ENODEV); -} +static inline void debugfs_create_size_t(const char *name, umode_t mode, + struct dentry *parent, size_t *value) +{ } static inline struct dentry *debugfs_create_atomic_t(const char *name, umode_t mode, struct dentry *parent, atomic_t *value) -- cgit v1.2.3 From c9c13ba428ef90a9b408a6cdf874e14ab5754516 Mon Sep 17 00:00:00 2001 From: Denis Efremov Date: Sat, 28 Sep 2019 02:43:08 +0300 Subject: PCI: Add PCI_STD_NUM_BARS for the number of standard BARs Code that iterates over all standard PCI BARs typically uses PCI_STD_RESOURCE_END. However, that requires the unusual test "i <= PCI_STD_RESOURCE_END" rather than something the typical "i < PCI_STD_NUM_BARS". Add a definition for PCI_STD_NUM_BARS and change loops to use the more idiomatic C style to help avoid fencepost errors. Link: https://lore.kernel.org/r/20190927234026.23342-1-efremov@linux.com Link: https://lore.kernel.org/r/20190927234308.23935-1-efremov@linux.com Link: https://lore.kernel.org/r/20190916204158.6889-3-efremov@linux.com Signed-off-by: Denis Efremov Signed-off-by: Bjorn Helgaas Acked-by: Sebastian Ott # arch/s390/ Acked-by: Bartlomiej Zolnierkiewicz # video/fbdev/ Acked-by: Gustavo Pimentel # pci/controller/dwc/ Acked-by: Jack Wang # scsi/pm8001/ Acked-by: Martin K. Petersen # scsi/pm8001/ Acked-by: Ulf Hansson # memstick/ --- include/linux/pci-epc.h | 2 +- include/linux/pci.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h index f641badc2c61..56f1846b9d39 100644 --- a/include/linux/pci-epc.h +++ b/include/linux/pci-epc.h @@ -117,7 +117,7 @@ struct pci_epc_features { unsigned int msix_capable : 1; u8 reserved_bar; u8 bar_fixed_64bit; - u64 bar_fixed_size[BAR_5 + 1]; + u64 bar_fixed_size[PCI_STD_NUM_BARS]; size_t align; }; diff --git a/include/linux/pci.h b/include/linux/pci.h index f9088c89a534..4cc739616148 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -82,7 +82,7 @@ enum pci_mmap_state { enum { /* #0-5: standard PCI resources */ PCI_STD_RESOURCES, - PCI_STD_RESOURCE_END = 5, + PCI_STD_RESOURCE_END = PCI_STD_RESOURCES + PCI_STD_NUM_BARS - 1, /* #6: expansion ROM resource */ PCI_ROM_RESOURCE, -- cgit v1.2.3 From 9e420d7f125f51ab1eda37497b08c4fad9efe4a8 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Fri, 11 Oct 2019 17:05:43 +0200 Subject: rts: ds1685: remove not needed fields from private struct A few of the fields in struct ds1685_priv aren't needed at all, so we can remove it. Signed-off-by: Thomas Bogendoerfer Acked-by: Joshua Kinard Link: https://lore.kernel.org/r/20191011150546.9186-1-tbogendoerfer@suse.de Signed-off-by: Alexandre Belloni --- include/linux/rtc/ds1685.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rtc/ds1685.h b/include/linux/rtc/ds1685.h index 43aec568ba7c..b9671d00d964 100644 --- a/include/linux/rtc/ds1685.h +++ b/include/linux/rtc/ds1685.h @@ -43,13 +43,10 @@ struct ds1685_priv { struct rtc_device *dev; void __iomem *regs; u32 regstep; - resource_size_t baseaddr; size_t size; int irq_num; bool bcd_mode; bool no_irq; - bool uie_unsupported; - bool alloc_io_resources; u8 (*read)(struct ds1685_priv *, int); void (*write)(struct ds1685_priv *, int, u8); void (*prepare_poweroff)(void); -- cgit v1.2.3 From af818031f4637b0e8d106fcc9023f1c22c44e13a Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Fri, 11 Oct 2019 17:05:44 +0200 Subject: rtc: ds1685: use devm_platform_ioremap_resource helper Simplify ioremapping of registers by using devm_platform_ioremap_resource. Signed-off-by: Thomas Bogendoerfer Acked-by: Joshua Kinard Link: https://lore.kernel.org/r/20191011150546.9186-2-tbogendoerfer@suse.de Signed-off-by: Alexandre Belloni --- include/linux/rtc/ds1685.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rtc/ds1685.h b/include/linux/rtc/ds1685.h index b9671d00d964..101c7adc05a2 100644 --- a/include/linux/rtc/ds1685.h +++ b/include/linux/rtc/ds1685.h @@ -43,7 +43,6 @@ struct ds1685_priv { struct rtc_device *dev; void __iomem *regs; u32 regstep; - size_t size; int irq_num; bool bcd_mode; bool no_irq; -- cgit v1.2.3 From 781ca2de89bae1b1d2c96df9ef33e9a324415995 Mon Sep 17 00:00:00 2001 From: Tom Murphy Date: Sun, 8 Sep 2019 09:56:38 -0700 Subject: iommu: Add gfp parameter to iommu_ops::map Add a gfp_t parameter to the iommu_ops::map function. Remove the needless locking in the AMD iommu driver. The iommu_ops::map function (or the iommu_map function which calls it) was always supposed to be sleepable (according to Joerg's comment in this thread: https://lore.kernel.org/patchwork/patch/977520/ ) and so should probably have had a "might_sleep()" since it was written. However currently the dma-iommu api can call iommu_map in an atomic context, which it shouldn't do. This doesn't cause any problems because any iommu driver which uses the dma-iommu api uses gfp_atomic in it's iommu_ops::map function. But doing this wastes the memory allocators atomic pools. Signed-off-by: Tom Murphy Reviewed-by: Robin Murphy Reviewed-by: Christoph Hellwig Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 29bac5345563..6ca3fb2873d7 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -256,7 +256,7 @@ struct iommu_ops { int (*attach_dev)(struct iommu_domain *domain, struct device *dev); void (*detach_dev)(struct iommu_domain *domain, struct device *dev); int (*map)(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot); + phys_addr_t paddr, size_t size, int prot, gfp_t gfp); size_t (*unmap)(struct iommu_domain *domain, unsigned long iova, size_t size, struct iommu_iotlb_gather *iotlb_gather); void (*flush_iotlb_all)(struct iommu_domain *domain); @@ -421,6 +421,8 @@ extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev); extern struct iommu_domain *iommu_get_dma_domain(struct device *dev); extern int iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot); +extern int iommu_map_atomic(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot); extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size); extern size_t iommu_unmap_fast(struct iommu_domain *domain, @@ -428,6 +430,9 @@ extern size_t iommu_unmap_fast(struct iommu_domain *domain, struct iommu_iotlb_gather *iotlb_gather); extern size_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, struct scatterlist *sg,unsigned int nents, int prot); +extern size_t iommu_map_sg_atomic(struct iommu_domain *domain, + unsigned long iova, struct scatterlist *sg, + unsigned int nents, int prot); extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova); extern void iommu_set_fault_handler(struct iommu_domain *domain, iommu_fault_handler_t handler, void *token); @@ -662,6 +667,13 @@ static inline int iommu_map(struct iommu_domain *domain, unsigned long iova, return -ENODEV; } +static inline int iommu_map_atomic(struct iommu_domain *domain, + unsigned long iova, phys_addr_t paddr, + size_t size, int prot) +{ + return -ENODEV; +} + static inline size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) { @@ -682,6 +694,13 @@ static inline size_t iommu_map_sg(struct iommu_domain *domain, return 0; } +static inline size_t iommu_map_sg_atomic(struct iommu_domain *domain, + unsigned long iova, struct scatterlist *sg, + unsigned int nents, int prot) +{ + return 0; +} + static inline void iommu_flush_tlb_all(struct iommu_domain *domain) { } -- cgit v1.2.3 From 35009c807488ccd5a01cbf102033695e52794b68 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 1 Oct 2019 17:27:22 +0300 Subject: ACPI / utils: Introduce acpi_dev_hid_uid_match() helper There are users outside of ACPI realm which reimplementing the comparator function to check if the given device matches to given HID and UID. For better utilization, introduce a helper for everyone to use. Signed-off-by: Andy Shevchenko Reviewed-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 8b4e516bac00..0f37a7d5fa77 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -678,6 +678,14 @@ static inline bool acpi_dev_present(const char *hid, const char *uid, s64 hrv) return false; } +struct acpi_device; + +static inline bool +acpi_dev_hid_uid_match(struct acpi_device *adev, const char *hid2, const char *uid2) +{ + return false; +} + static inline struct acpi_device * acpi_dev_get_first_match_dev(const char *hid, const char *uid, s64 hrv) { -- cgit v1.2.3 From b2c98153f45fc17b9fcb241000f2d131ddea6030 Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Thu, 26 Sep 2019 13:51:30 +0300 Subject: spi: introduce spi_delay struct as "value + unit" & spi_delay_exec() There are plenty of delays that have been introduced in SPI core. Most of them are in micro-seconds, some need to be in nano-seconds, and some in clock-cycles. For some of these delays (related to transfers & CS timing) it may make sense to have a `spi_delay` struct that abstracts these a bit. The important element of these delays [for unification] seems to be the `unit` of the delay. It looks like micro-seconds is good enough for most people, but every-once in a while, some delays seem to require other units of measurement. This change adds the `spi_delay` struct & a `spi_delay_exec()` function that processes a `spi_delay` object/struct to execute the delay. It's a copy of the `cs_change_delay` mechanism, but without the default for 10 uS. The clock-cycle delay unit is a bit special, as it needs to be bound to an `spi_transfer` object to execute. Signed-off-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20190926105147.7839-3-alexandru.ardelean@analog.com Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 27f6b046cf92..8f643de3197b 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -90,6 +90,21 @@ void spi_statistics_add_transfer_stats(struct spi_statistics *stats, #define SPI_STATISTICS_INCREMENT_FIELD(stats, field) \ SPI_STATISTICS_ADD_TO_FIELD(stats, field, 1) +/** + * struct spi_delay - SPI delay information + * @value: Value for the delay + * @unit: Unit for the delay + */ +struct spi_delay { +#define SPI_DELAY_UNIT_USECS 0 +#define SPI_DELAY_UNIT_NSECS 1 +#define SPI_DELAY_UNIT_SCK 2 + u16 value; + u8 unit; +}; + +extern int spi_delay_exec(struct spi_delay *_delay, struct spi_transfer *xfer); + /** * struct spi_device - Controller side proxy for an SPI slave device * @dev: Driver model representation of the device. @@ -887,9 +902,6 @@ struct spi_transfer { u16 delay_usecs; u16 cs_change_delay; u8 cs_change_delay_unit; -#define SPI_DELAY_UNIT_USECS 0 -#define SPI_DELAY_UNIT_NSECS 1 -#define SPI_DELAY_UNIT_SCK 2 u32 speed_hz; u16 word_delay; -- cgit v1.2.3 From 329f0dac4cad9fa4b1439a88180d91bcb5c4eaf8 Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Thu, 26 Sep 2019 13:51:31 +0300 Subject: spi: make `cs_change_delay` the first user of the `spi_delay` logic Since the logic for `spi_delay` struct + `spi_delay_exec()` has been copied from the `cs_change_delay` logic, it's natural to make this delay, the first user. The `cs_change_delay` logic requires that the default remain 10 uS, in case it is unspecified/unconfigured. So, there is some special handling needed to do that. The ADIS library is one of the few users of the new `cs_change_delay` parameter for an spi_transfer. The introduction of the `spi_delay` struct, requires that the users of of `cs_change_delay` get an update. This change also updates the ADIS library. Signed-off-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20190926105147.7839-4-alexandru.ardelean@analog.com Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 8f643de3197b..7670be934643 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -778,7 +778,6 @@ extern void spi_res_release(struct spi_controller *ctlr, * @cs_change: affects chipselect after this transfer completes * @cs_change_delay: delay between cs deassert and assert when * @cs_change is set and @spi_transfer is not the last in @spi_message - * @cs_change_delay_unit: unit of cs_change_delay * @delay_usecs: microseconds to delay after this transfer before * (optionally) changing the chipselect status, then starting * the next transfer or completing this @spi_message. @@ -900,8 +899,7 @@ struct spi_transfer { u8 bits_per_word; u8 word_delay_usecs; u16 delay_usecs; - u16 cs_change_delay; - u8 cs_change_delay_unit; + struct spi_delay cs_change_delay; u32 speed_hz; u16 word_delay; -- cgit v1.2.3 From 84593a131c3af21d686d05c4b4432290a415d399 Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Thu, 26 Sep 2019 13:51:32 +0300 Subject: spi: sprd: convert transfer word delay to spi_delay struct The Spreadtrum SPI driver is the only user of the `word_delay` field in the `spi_transfer` struct. This change converts the field to use the `spi_delay` struct. This also enforces the users to specify the delay unit to be `SPI_DELAY_UNIT_SCK`. Signed-off-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20190926105147.7839-5-alexandru.ardelean@analog.com Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 7670be934643..6cb67ad53ffa 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -783,7 +783,7 @@ extern void spi_res_release(struct spi_controller *ctlr, * the next transfer or completing this @spi_message. * @word_delay_usecs: microseconds to inter word delay after each word size * (set by bits_per_word) transmission. - * @word_delay: clock cycles to inter word delay after each word size + * @word_delay: inter word delay to be introduced after each word size * (set by bits_per_word) transmission. * @effective_speed_hz: the effective SCK-speed that was used to * transfer this transfer. Set to 0 if the spi bus driver does @@ -900,8 +900,8 @@ struct spi_transfer { u8 word_delay_usecs; u16 delay_usecs; struct spi_delay cs_change_delay; + struct spi_delay word_delay; u32 speed_hz; - u16 word_delay; u32 effective_speed_hz; -- cgit v1.2.3 From 6c613f68aabf33385c01e949204ac5ed30887161 Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Thu, 26 Sep 2019 13:51:35 +0300 Subject: spi: core,atmel: convert `word_delay_usecs` -> `word_delay` for spi_device This change does a conversion from the `word_delay_usecs` -> `word_delay` for the `spi_device` struct. This allows users to specify inter-word delays in other unit types (nano-seconds or clock cycles), depending on how users want. The Atmel SPI driver is the only current user of the `word_delay_usecs` field (from the `spi_device` struct). So, it needed a slight conversion to use the `word_delay` as an `spi_delay` struct. In SPI core, the only required mechanism is to update the `word_delay` information per `spi_transfer`. This requires a bit more logic than before, because it needs that both delays be converted to a common unit (nano-seconds) for comparison. Signed-off-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20190926105147.7839-8-alexandru.ardelean@analog.com Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 6cb67ad53ffa..ebeb272aeb0f 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -139,7 +139,7 @@ extern int spi_delay_exec(struct spi_delay *_delay, struct spi_transfer *xfer); * the spi_master. * @cs_gpiod: gpio descriptor of the chipselect line (optional, NULL when * not using a GPIO line) - * @word_delay_usecs: microsecond delay to be inserted between consecutive + * @word_delay: delay to be inserted between consecutive * words of a transfer * * @statistics: statistics for the spi_device @@ -189,7 +189,7 @@ struct spi_device { const char *driver_override; int cs_gpio; /* LEGACY: chip select gpio */ struct gpio_desc *cs_gpiod; /* chip select gpio desc */ - uint8_t word_delay_usecs; /* inter-word delay */ + struct spi_delay word_delay; /* inter-word delay */ /* the statistics */ struct spi_statistics statistics; @@ -781,8 +781,6 @@ extern void spi_res_release(struct spi_controller *ctlr, * @delay_usecs: microseconds to delay after this transfer before * (optionally) changing the chipselect status, then starting * the next transfer or completing this @spi_message. - * @word_delay_usecs: microseconds to inter word delay after each word size - * (set by bits_per_word) transmission. * @word_delay: inter word delay to be introduced after each word size * (set by bits_per_word) transmission. * @effective_speed_hz: the effective SCK-speed that was used to @@ -897,7 +895,6 @@ struct spi_transfer { #define SPI_NBITS_DUAL 0x02 /* 2bits transfer */ #define SPI_NBITS_QUAD 0x04 /* 4bits transfer */ u8 bits_per_word; - u8 word_delay_usecs; u16 delay_usecs; struct spi_delay cs_change_delay; struct spi_delay word_delay; -- cgit v1.2.3 From bebcfd272df648542c458d28fbd6a8f9428b5310 Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Thu, 26 Sep 2019 13:51:36 +0300 Subject: spi: introduce `delay` field for `spi_transfer` + spi_transfer_delay_exec() The change introduces the `delay` field to the `spi_transfer` struct as an `struct spi_delay` type. This intends to eventually replace `delay_usecs`. But, since there are many users of `delay_usecs`, this needs some intermediate work. A helper called `spi_transfer_delay_exec()` is also added, which maintains backwards compatibility with `delay_usecs`, by assigning the value to `delay` if non-zero. This should maintain backwards compatibility with current users of `udelay_usecs`. Signed-off-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20190926105147.7839-9-alexandru.ardelean@analog.com Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index ebeb272aeb0f..fe5b85df2c79 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -778,6 +778,9 @@ extern void spi_res_release(struct spi_controller *ctlr, * @cs_change: affects chipselect after this transfer completes * @cs_change_delay: delay between cs deassert and assert when * @cs_change is set and @spi_transfer is not the last in @spi_message + * @delay: delay to be introduced after this transfer before + * (optionally) changing the chipselect status, then starting + * the next transfer or completing this @spi_message. * @delay_usecs: microseconds to delay after this transfer before * (optionally) changing the chipselect status, then starting * the next transfer or completing this @spi_message. @@ -896,6 +899,7 @@ struct spi_transfer { #define SPI_NBITS_QUAD 0x04 /* 4bits transfer */ u8 bits_per_word; u16 delay_usecs; + struct spi_delay delay; struct spi_delay cs_change_delay; struct spi_delay word_delay; u32 speed_hz; @@ -1003,6 +1007,20 @@ spi_transfer_del(struct spi_transfer *t) list_del(&t->transfer_list); } +static inline int +spi_transfer_delay_exec(struct spi_transfer *t) +{ + struct spi_delay d; + + if (t->delay_usecs) { + d.value = t->delay_usecs; + d.unit = SPI_DELAY_UNIT_USECS; + return spi_delay_exec(&d, NULL); + } + + return spi_delay_exec(&t->delay, t); +} + /** * spi_message_init_with_transfers - Initialize spi_message and append transfers * @m: spi_message to be initialized -- cgit v1.2.3 From 8105936684681195d9073880b06a123b2e316811 Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Thu, 26 Sep 2019 13:51:42 +0300 Subject: spi: tegra114: change format for `spi_set_cs_timing()` function The initial version of `spi_set_cs_timing()` was implemented with consideration only for clock-cycles as delay. For cases like `CS setup` time, it's sometimes needed that micro-seconds (or nano-seconds) are required, or sometimes even longer delays, for cases where the device needs a little longer to start transferring that after CS is asserted. Signed-off-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20190926105147.7839-15-alexandru.ardelean@analog.com Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index fe5b85df2c79..f9b4ba2db08d 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -524,8 +524,8 @@ struct spi_controller { * to configure specific CS timing through spi_set_cs_timing() after * spi_setup(). */ - void (*set_cs_timing)(struct spi_device *spi, u8 setup_clk_cycles, - u8 hold_clk_cycles, u8 inactive_clk_cycles); + int (*set_cs_timing)(struct spi_device *spi, struct spi_delay *setup, + struct spi_delay *hold, struct spi_delay *inactive); /* bidirectional bulk transfers * @@ -1068,7 +1068,10 @@ static inline void spi_message_free(struct spi_message *m) kfree(m); } -extern void spi_set_cs_timing(struct spi_device *spi, u8 setup, u8 hold, u8 inactive_dly); +extern int spi_set_cs_timing(struct spi_device *spi, + struct spi_delay *setup, + struct spi_delay *hold, + struct spi_delay *inactive); extern int spi_setup(struct spi_device *spi); extern int spi_async(struct spi_device *spi, struct spi_message *message); -- cgit v1.2.3 From 25093bdeb6bcae728e12e3795261dbd3677060a9 Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Thu, 26 Sep 2019 13:51:43 +0300 Subject: spi: implement SW control for CS times This change implements CS control for setup, hold & inactive delays. The `cs_setup` delay is completely new, and can help with cases where asserting the CS, also brings the device out of power-sleep, where there needs to be a longer (than usual), before transferring data. The `cs_hold` time can overlap with the `delay` (or `delay_usecs`) from an SPI transfer. The main difference is that `cs_hold` implies that CS will be de-asserted. The `cs_inactive` delay does not have a clear use-case yet. It has been implemented mostly because the `spi_set_cs_timing()` function implements it. To some degree, this could overlap or replace `cs_change_delay`, but this will require more consideration/investigation in the future. All these delays have been added to the `spi_controller` struct, as they would typically be configured by calling `spi_set_cs_timing()` after an `spi_setup()` call. Software-mode for CS control, implies that the `set_cs_timing()` hook has not been provided for the `spi_controller` object. Signed-off-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20190926105147.7839-16-alexandru.ardelean@analog.com Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index f9b4ba2db08d..cfd87b18f077 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -609,6 +609,11 @@ struct spi_controller { /* Optimized handlers for SPI memory-like operations. */ const struct spi_controller_mem_ops *mem_ops; + /* CS delays */ + struct spi_delay cs_setup; + struct spi_delay cs_hold; + struct spi_delay cs_inactive; + /* gpio chip select */ int *cs_gpios; struct gpio_desc **cs_gpiods; -- cgit v1.2.3 From 3984d39b0e41ac4de8b4530ae3911ccf52ed4bbf Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Thu, 26 Sep 2019 13:51:44 +0300 Subject: spi: spi-fsl-espi: convert transfer delay to `spi_delay` format The way the max delay is computed for this controller, it looks like it is searching for the max delay from an SPI message a using that. No idea if this is valid. But this change should support both `delay_usecs` and the new `delay` data which is of `spi_delay` type. Signed-off-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20190926105147.7839-17-alexandru.ardelean@analog.com Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index cfd87b18f077..c40d6af2bf07 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -103,6 +103,7 @@ struct spi_delay { u8 unit; }; +extern int spi_delay_to_ns(struct spi_delay *_delay, struct spi_transfer *xfer); extern int spi_delay_exec(struct spi_delay *_delay, struct spi_transfer *xfer); /** -- cgit v1.2.3 From f226650494c6aa87526d12135b7de8b8c074f3de Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 2 Oct 2019 10:06:12 +0100 Subject: arm64: Relax ICC_PMR_EL1 accesses when ICC_CTLR_EL1.PMHE is clear The GICv3 architecture specification is incredibly misleading when it comes to PMR and the requirement for a DSB. It turns out that this DSB is only required if the CPU interface sends an Upstream Control message to the redistributor in order to update the RD's view of PMR. This message is only sent when ICC_CTLR_EL1.PMHE is set, which isn't the case in Linux. It can still be set from EL3, so some special care is required. But the upshot is that in the (hopefuly large) majority of the cases, we can drop the DSB altogether. This relies on a new static key being set if the boot CPU has PMHE set. The drawback is that this static key has to be exported to modules. Cc: Will Deacon Cc: James Morse Cc: Julien Thierry Cc: Suzuki K Poulose Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas --- include/linux/irqchip/arm-gic-v3.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 5cc10cf7cb3e..a0bde9e12efa 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -487,6 +487,8 @@ #define ICC_CTLR_EL1_EOImode_MASK (1 << ICC_CTLR_EL1_EOImode_SHIFT) #define ICC_CTLR_EL1_CBPR_SHIFT 0 #define ICC_CTLR_EL1_CBPR_MASK (1 << ICC_CTLR_EL1_CBPR_SHIFT) +#define ICC_CTLR_EL1_PMHE_SHIFT 6 +#define ICC_CTLR_EL1_PMHE_MASK (1 << ICC_CTLR_EL1_PMHE_SHIFT) #define ICC_CTLR_EL1_PRI_BITS_SHIFT 8 #define ICC_CTLR_EL1_PRI_BITS_MASK (0x7 << ICC_CTLR_EL1_PRI_BITS_SHIFT) #define ICC_CTLR_EL1_ID_BITS_SHIFT 11 -- cgit v1.2.3 From 4c7c171f85b261f91270d405b7c7390aa6ddfb60 Mon Sep 17 00:00:00 2001 From: Yi L Liu Date: Wed, 2 Oct 2019 12:42:40 -0700 Subject: iommu: Introduce cache_invalidate API In any virtualization use case, when the first translation stage is "owned" by the guest OS, the host IOMMU driver has no knowledge of caching structure updates unless the guest invalidation activities are trapped by the virtualizer and passed down to the host. Since the invalidation data can be obtained from user space and will be written into physical IOMMU, we must allow security check at various layers. Therefore, generic invalidation data format are proposed here, model specific IOMMU drivers need to convert them into their own format. Signed-off-by: Yi L Liu Signed-off-by: Jacob Pan Signed-off-by: Ashok Raj Signed-off-by: Eric Auger Signed-off-by: Jean-Philippe Brucker Reviewed-by: Jean-Philippe Brucker Reviewed-by: Eric Auger Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 29bac5345563..9b22055e6f85 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -244,6 +244,7 @@ struct iommu_iotlb_gather { * @sva_unbind: Unbind process address space from device * @sva_get_pasid: Get PASID associated to a SVA handle * @page_response: handle page request response + * @cache_invalidate: invalidate translation caches * @pgsize_bitmap: bitmap of all possible supported page sizes */ struct iommu_ops { @@ -306,6 +307,8 @@ struct iommu_ops { int (*page_response)(struct device *dev, struct iommu_fault_event *evt, struct iommu_page_response *msg); + int (*cache_invalidate)(struct iommu_domain *domain, struct device *dev, + struct iommu_cache_invalidate_info *inv_info); unsigned long pgsize_bitmap; }; @@ -417,6 +420,9 @@ extern int iommu_attach_device(struct iommu_domain *domain, struct device *dev); extern void iommu_detach_device(struct iommu_domain *domain, struct device *dev); +extern int iommu_cache_invalidate(struct iommu_domain *domain, + struct device *dev, + struct iommu_cache_invalidate_info *inv_info); extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev); extern struct iommu_domain *iommu_get_dma_domain(struct device *dev); extern int iommu_map(struct iommu_domain *domain, unsigned long iova, @@ -1005,6 +1011,14 @@ static inline int iommu_sva_get_pasid(struct iommu_sva *handle) return IOMMU_PASID_INVALID; } +static inline int +iommu_cache_invalidate(struct iommu_domain *domain, + struct device *dev, + struct iommu_cache_invalidate_info *inv_info) +{ + return -ENODEV; +} + #endif /* CONFIG_IOMMU_API */ #ifdef CONFIG_IOMMU_DEBUGFS -- cgit v1.2.3 From fa83433c92e340822a056a610a4fa2063a3db304 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 2 Oct 2019 12:42:41 -0700 Subject: iommu: Add I/O ASID allocator Some devices might support multiple DMA address spaces, in particular those that have the PCI PASID feature. PASID (Process Address Space ID) allows to share process address spaces with devices (SVA), partition a device into VM-assignable entities (VFIO mdev) or simply provide multiple DMA address space to kernel drivers. Add a global PASID allocator usable by different drivers at the same time. Name it I/O ASID to avoid confusion with ASIDs allocated by arch code, which are usually a separate ID space. The IOASID space is global. Each device can have its own PASID space, but by convention the IOMMU ended up having a global PASID space, so that with SVA, each mm_struct is associated to a single PASID. The allocator is primarily used by IOMMU subsystem but in rare occasions drivers would like to allocate PASIDs for devices that aren't managed by an IOMMU, using the same ID space as IOMMU. Signed-off-by: Jean-Philippe Brucker Signed-off-by: Jacob Pan Reviewed-by: Jean-Philippe Brucker Reviewed-by: Eric Auger Signed-off-by: Joerg Roedel --- include/linux/ioasid.h | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 include/linux/ioasid.h (limited to 'include/linux') diff --git a/include/linux/ioasid.h b/include/linux/ioasid.h new file mode 100644 index 000000000000..17337a13f06e --- /dev/null +++ b/include/linux/ioasid.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_IOASID_H +#define __LINUX_IOASID_H + +#include +#include + +#define INVALID_IOASID ((ioasid_t)-1) +typedef unsigned int ioasid_t; + +struct ioasid_set { + int dummy; +}; + +#define DECLARE_IOASID_SET(name) struct ioasid_set name = { 0 } + +#if IS_ENABLED(CONFIG_IOASID) +ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, ioasid_t max, + void *private); +void ioasid_free(ioasid_t ioasid); +void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid, + bool (*getter)(void *)); +int ioasid_set_data(ioasid_t ioasid, void *data); + +#else /* !CONFIG_IOASID */ +static inline ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, + ioasid_t max, void *private) +{ + return INVALID_IOASID; +} + +static inline void ioasid_free(ioasid_t ioasid) +{ +} + +static inline void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid, + bool (*getter)(void *)) +{ + return NULL; +} + +static inline int ioasid_set_data(ioasid_t ioasid, void *data) +{ + return -ENOTSUPP; +} + +#endif /* CONFIG_IOASID */ +#endif /* __LINUX_IOASID_H */ -- cgit v1.2.3 From e5c0bd7f2206cd288029edb6afbfde93c73b4048 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Wed, 2 Oct 2019 12:42:42 -0700 Subject: iommu/ioasid: Add custom allocators IOASID allocation may rely on platform specific methods. One use case is that when running in the guest, in order to obtain system wide global IOASIDs, emulated allocation interface is needed to communicate with the host. Here we call these platform specific allocators custom allocators. Custom IOASID allocators can be registered at runtime and take precedence over the default XArray allocator. They have these attributes: - provides platform specific alloc()/free() functions with private data. - allocation results lookup are not provided by the allocator, lookup request must be done by the IOASID framework by its own XArray. - allocators can be unregistered at runtime, either fallback to the next custom allocator or to the default allocator. - custom allocators can share the same set of alloc()/free() helpers, in this case they also share the same IOASID space, thus the same XArray. - switching between allocators requires all outstanding IOASIDs to be freed unless the two allocators share the same alloc()/free() helpers. Signed-off-by: Jean-Philippe Brucker Signed-off-by: Jacob Pan Link: https://lkml.org/lkml/2019/4/26/462 Reviewed-by: Jean-Philippe Brucker Reviewed-by: Eric Auger Signed-off-by: Joerg Roedel --- include/linux/ioasid.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ioasid.h b/include/linux/ioasid.h index 17337a13f06e..6f000d7a0ddc 100644 --- a/include/linux/ioasid.h +++ b/include/linux/ioasid.h @@ -7,11 +7,28 @@ #define INVALID_IOASID ((ioasid_t)-1) typedef unsigned int ioasid_t; +typedef ioasid_t (*ioasid_alloc_fn_t)(ioasid_t min, ioasid_t max, void *data); +typedef void (*ioasid_free_fn_t)(ioasid_t ioasid, void *data); struct ioasid_set { int dummy; }; +/** + * struct ioasid_allocator_ops - IOASID allocator helper functions and data + * + * @alloc: helper function to allocate IOASID + * @free: helper function to free IOASID + * @list: for tracking ops that share helper functions but not data + * @pdata: data belong to the allocator, provided when calling alloc() + */ +struct ioasid_allocator_ops { + ioasid_alloc_fn_t alloc; + ioasid_free_fn_t free; + struct list_head list; + void *pdata; +}; + #define DECLARE_IOASID_SET(name) struct ioasid_set name = { 0 } #if IS_ENABLED(CONFIG_IOASID) @@ -20,6 +37,8 @@ ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, ioasid_t max, void ioasid_free(ioasid_t ioasid); void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid, bool (*getter)(void *)); +int ioasid_register_allocator(struct ioasid_allocator_ops *allocator); +void ioasid_unregister_allocator(struct ioasid_allocator_ops *allocator); int ioasid_set_data(ioasid_t ioasid, void *data); #else /* !CONFIG_IOASID */ @@ -39,6 +58,15 @@ static inline void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid, return NULL; } +static inline int ioasid_register_allocator(struct ioasid_allocator_ops *allocator) +{ + return -ENOTSUPP; +} + +static inline void ioasid_unregister_allocator(struct ioasid_allocator_ops *allocator) +{ +} + static inline int ioasid_set_data(ioasid_t ioasid, void *data) { return -ENOTSUPP; -- cgit v1.2.3 From 808be0aae53a3675337fad9cde616e086bdc8287 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Wed, 2 Oct 2019 12:42:43 -0700 Subject: iommu: Introduce guest PASID bind function Guest shared virtual address (SVA) may require host to shadow guest PASID tables. Guest PASID can also be allocated from the host via enlightened interfaces. In this case, guest needs to bind the guest mm, i.e. cr3 in guest physical address to the actual PASID table in the host IOMMU. Nesting will be turned on such that guest virtual address can go through a two level translation: - 1st level translates GVA to GPA - 2nd level translates GPA to HPA This patch introduces APIs to bind guest PASID data to the assigned device entry in the physical IOMMU. See the diagram below for usage explanation. .-------------. .---------------------------. | vIOMMU | | Guest process mm, FL only | | | '---------------------------' .----------------/ | PASID Entry |--- PASID cache flush - '-------------' | | | V | | GP '-------------' Guest ------| Shadow |----------------------- GP->HP* --------- v v | Host v .-------------. .----------------------. | pIOMMU | | Bind FL for GVA-GPA | | | '----------------------' .----------------/ | | PASID Entry | V (Nested xlate) '----------------\.---------------------. | | |Set SL to GPA-HPA | | | '---------------------' '-------------' Where: - FL = First level/stage one page tables - SL = Second level/stage two page tables - GP = Guest PASID - HP = Host PASID * Conversion needed if non-identity GP-HP mapping option is chosen. Signed-off-by: Jacob Pan Signed-off-by: Liu Yi L Reviewed-by: Jean-Philippe Brucker Reviewed-by: Jean-Philippe Brucker Reviewed-by: Eric Auger Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 9b22055e6f85..f8959f759e41 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #define IOMMU_READ (1 << 0) @@ -246,6 +247,8 @@ struct iommu_iotlb_gather { * @page_response: handle page request response * @cache_invalidate: invalidate translation caches * @pgsize_bitmap: bitmap of all possible supported page sizes + * @sva_bind_gpasid: bind guest pasid and mm + * @sva_unbind_gpasid: unbind guest pasid and mm */ struct iommu_ops { bool (*capable)(enum iommu_cap); @@ -309,6 +312,10 @@ struct iommu_ops { struct iommu_page_response *msg); int (*cache_invalidate)(struct iommu_domain *domain, struct device *dev, struct iommu_cache_invalidate_info *inv_info); + int (*sva_bind_gpasid)(struct iommu_domain *domain, + struct device *dev, struct iommu_gpasid_bind_data *data); + + int (*sva_unbind_gpasid)(struct device *dev, int pasid); unsigned long pgsize_bitmap; }; @@ -423,6 +430,10 @@ extern void iommu_detach_device(struct iommu_domain *domain, extern int iommu_cache_invalidate(struct iommu_domain *domain, struct device *dev, struct iommu_cache_invalidate_info *inv_info); +extern int iommu_sva_bind_gpasid(struct iommu_domain *domain, + struct device *dev, struct iommu_gpasid_bind_data *data); +extern int iommu_sva_unbind_gpasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid); extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev); extern struct iommu_domain *iommu_get_dma_domain(struct device *dev); extern int iommu_map(struct iommu_domain *domain, unsigned long iova, @@ -1018,6 +1029,17 @@ iommu_cache_invalidate(struct iommu_domain *domain, { return -ENODEV; } +static inline int iommu_sva_bind_gpasid(struct iommu_domain *domain, + struct device *dev, struct iommu_gpasid_bind_data *data) +{ + return -ENODEV; +} + +static inline int iommu_sva_unbind_gpasid(struct iommu_domain *domain, + struct device *dev, int pasid) +{ + return -ENODEV; +} #endif /* CONFIG_IOMMU_API */ -- cgit v1.2.3 From 13ef954445df4fd1d7c003a500ec5ce49573e14b Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 15 Oct 2019 08:43:42 -0700 Subject: iomap: Allow forcing of waiting for running DIO in iomap_dio_rw() Filesystems do not support doing IO as asynchronous in some cases. For example in case of unaligned writes or in case file size needs to be extended (e.g. for ext4). Instead of forcing filesystem to wait for AIO in such cases, add argument to iomap_dio_rw() which makes the function wait for IO completion. This also results in executing iomap_dio_complete() inline in iomap_dio_rw() providing its return value to the caller as for ordinary sync IO. Signed-off-by: Jan Kara Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- include/linux/iomap.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 7aa5d6117936..76b14cb729dc 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -195,7 +195,8 @@ struct iomap_dio_ops { }; ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, - const struct iomap_ops *ops, const struct iomap_dio_ops *dops); + const struct iomap_ops *ops, const struct iomap_dio_ops *dops, + bool wait_for_completion); int iomap_dio_iopoll(struct kiocb *kiocb, bool spin); #ifdef CONFIG_SWAP -- cgit v1.2.3 From aff5d0552da4055da3faa27ee4252e48bb1f5821 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 5 Oct 2019 14:04:36 +0200 Subject: PCI/ASPM: Add L1 PM substate support to pci_disable_link_state() Add support for disabling states L1.1 and L1.2 to pci_disable_link_state(). Allow separate control of ASPM and PCI PM L1 substates. Link: https://lore.kernel.org/r/d81f8036-c236-6463-48e7-ebcdcda85bba@gmail.com Signed-off-by: Heiner Kallweit Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index f9088c89a534..9dc5bee14ae9 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1544,9 +1544,13 @@ extern bool pcie_ports_native; #define pcie_ports_native false #endif -#define PCIE_LINK_STATE_L0S 1 -#define PCIE_LINK_STATE_L1 2 -#define PCIE_LINK_STATE_CLKPM 4 +#define PCIE_LINK_STATE_L0S BIT(0) +#define PCIE_LINK_STATE_L1 BIT(1) +#define PCIE_LINK_STATE_CLKPM BIT(2) +#define PCIE_LINK_STATE_L1_1 BIT(3) +#define PCIE_LINK_STATE_L1_2 BIT(4) +#define PCIE_LINK_STATE_L1_1_PCIPM BIT(5) +#define PCIE_LINK_STATE_L1_2_PCIPM BIT(6) #ifdef CONFIG_PCIEASPM int pci_disable_link_state(struct pci_dev *pdev, int state); -- cgit v1.2.3 From b7a73b33bb39575848df66c4bff09cd281652882 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sun, 6 Oct 2019 15:22:04 +0200 Subject: iio: imu: st_lsm6dsx: add wakeup_source in st_sensors_platform_data Add the possibility to enable/disable wakeup source through st_sensors_platform_data and not only through device tree Signed-off-by: Lorenzo Bianconi Signed-off-by: Jonathan Cameron --- include/linux/platform_data/st_sensors_pdata.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/st_sensors_pdata.h b/include/linux/platform_data/st_sensors_pdata.h index 30929c22227d..e40b28ca892e 100644 --- a/include/linux/platform_data/st_sensors_pdata.h +++ b/include/linux/platform_data/st_sensors_pdata.h @@ -18,12 +18,14 @@ * @open_drain: set the interrupt line to be open drain if possible. * @spi_3wire: enable spi-3wire mode. * @pullups: enable/disable i2c controller pullup resistors. + * @wakeup_source: enable/disable device as wakeup generator. */ struct st_sensors_platform_data { u8 drdy_int_pin; bool open_drain; bool spi_3wire; bool pullups; + bool wakeup_source; }; #endif /* ST_SENSORS_PDATA_H */ -- cgit v1.2.3 From 8cbb8a9374a271099bacdc890fb16d374261332b Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 9 Oct 2019 14:54:01 -0500 Subject: PCI/ATS: Move pci_prg_resp_pasid_required() to CONFIG_PCI_PRI pci_prg_resp_pasid_required() returns the value of the "PRG Response PASID Required" bit from the PRI capability, but the interface was previously defined under #ifdef CONFIG_PCI_PASID. Move it from CONFIG_PCI_PASID to CONFIG_PCI_PRI so it's with the other PRI-related things. Signed-off-by: Bjorn Helgaas Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Joerg Roedel --- include/linux/pci-ats.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h index 1ebb88e7c184..a7a2b3d94fcc 100644 --- a/include/linux/pci-ats.h +++ b/include/linux/pci-ats.h @@ -10,6 +10,7 @@ int pci_enable_pri(struct pci_dev *pdev, u32 reqs); void pci_disable_pri(struct pci_dev *pdev); void pci_restore_pri_state(struct pci_dev *pdev); int pci_reset_pri(struct pci_dev *pdev); +int pci_prg_resp_pasid_required(struct pci_dev *pdev); #else /* CONFIG_PCI_PRI */ @@ -31,6 +32,10 @@ static inline int pci_reset_pri(struct pci_dev *pdev) return -ENODEV; } +static inline int pci_prg_resp_pasid_required(struct pci_dev *pdev) +{ + return 0; +} #endif /* CONFIG_PCI_PRI */ #ifdef CONFIG_PCI_PASID @@ -40,7 +45,6 @@ void pci_disable_pasid(struct pci_dev *pdev); void pci_restore_pasid_state(struct pci_dev *pdev); int pci_pasid_features(struct pci_dev *pdev); int pci_max_pasids(struct pci_dev *pdev); -int pci_prg_resp_pasid_required(struct pci_dev *pdev); #else /* CONFIG_PCI_PASID */ @@ -66,11 +70,6 @@ static inline int pci_max_pasids(struct pci_dev *pdev) { return -EINVAL; } - -static inline int pci_prg_resp_pasid_required(struct pci_dev *pdev) -{ - return 0; -} #endif /* CONFIG_PCI_PASID */ -- cgit v1.2.3 From 3ad62192097443e8c3a8e244475bacaecb894d4e Mon Sep 17 00:00:00 2001 From: Kuppuswamy Sathyanarayanan Date: Thu, 5 Sep 2019 14:31:44 -0500 Subject: PCI/ATS: Disable PF/VF ATS service independently Previously we didn't disable the PF ATS until all associated VFs had disabled it. But per PCIe spec r5.0, sec 9.3.7.8, the ATS Capability in VFs and associated PFs may be enabled independently. Leaving ATS enabled in the PF unnecessarily may have power and performance impacts. Remove this dependency logic in the ATS enable/disable code. [bhelgaas: commit log] Suggested-by: Ashok Raj Link: https://lore.kernel.org/r/8163ab8fa66afd2cba514ae95d29ab12104781aa.1567029860.git.sathyanarayanan.kuppuswamy@linux.intel.com Link: https://lore.kernel.org/r/20190905193146.90250-4-helgaas@kernel.org Signed-off-by: Kuppuswamy Sathyanarayanan Signed-off-by: Bjorn Helgaas Cc: Ashok Raj Cc: Keith Busch --- include/linux/pci.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index f9088c89a534..c028883c8460 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -452,7 +452,6 @@ struct pci_dev { }; u16 ats_cap; /* ATS Capability offset */ u8 ats_stu; /* ATS Smallest Translation Unit */ - atomic_t ats_ref_cnt; /* Number of VFs with ATS enabled */ #endif #ifdef CONFIG_PCI_PRI u32 pri_reqs_alloc; /* Number of PRI requests allocated */ -- cgit v1.2.3 From c065190bbcd4fb54ce9c5fd34fcad71acf2a0ea4 Mon Sep 17 00:00:00 2001 From: Kuppuswamy Sathyanarayanan Date: Thu, 5 Sep 2019 14:31:45 -0500 Subject: PCI/ATS: Cache PRI Capability offset Previously each PRI interface searched for the PRI Capability. Cache the capability offset the first time we use it instead of searching each time. [bhelgaas: commit log, reorder patch to later, call pci_pri_init() from pci_init_capabilities()] Link: https://lore.kernel.org/r/0c5495d376faf6dbb8eb2165204c474438aaae65.156 7029860.git.sathyanarayanan.kuppuswamy@linux.intel.com Link: https://lore.kernel.org/r/20190905193146.90250-5-helgaas@kernel.org Signed-off-by: Kuppuswamy Sathyanarayanan Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index c028883c8460..e7770d990c46 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -454,6 +454,7 @@ struct pci_dev { u8 ats_stu; /* ATS Smallest Translation Unit */ #endif #ifdef CONFIG_PCI_PRI + u16 pri_cap; /* PRI Capability offset */ u32 pri_reqs_alloc; /* Number of PRI requests allocated */ #endif #ifdef CONFIG_PCI_PASID -- cgit v1.2.3 From 751035b8dc061ae434c3311bac9cd6d0e5e00f94 Mon Sep 17 00:00:00 2001 From: Kuppuswamy Sathyanarayanan Date: Thu, 5 Sep 2019 14:31:46 -0500 Subject: PCI/ATS: Cache PASID Capability offset Previously each PASID interface searched for the PASID Capability. Cache the capability offset the first time we use it instead of searching each time. [bhelgaas: commit log, reorder patch to later, call pci_pasid_init() from pci_init_capabilities()] Link: https://lore.kernel.org/r/4957778959fa34eab3e8b3065d1951989c61cb0f.1567029860.git.sathyanarayanan.kuppuswamy@linux.intel.com Link: https://lore.kernel.org/r/20190905193146.90250-6-helgaas@kernel.org Signed-off-by: Kuppuswamy Sathyanarayanan Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index e7770d990c46..6542100bd2dd 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -458,6 +458,7 @@ struct pci_dev { u32 pri_reqs_alloc; /* Number of PRI requests allocated */ #endif #ifdef CONFIG_PCI_PASID + u16 pasid_cap; /* PASID Capability offset */ u16 pasid_features; #endif #ifdef CONFIG_PCI_P2PDMA -- cgit v1.2.3 From e5adf79a1d8086aefa56f48eeb08f8fe4e054a3d Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 9 Oct 2019 16:07:51 -0500 Subject: PCI/ATS: Cache PRI PRG Response PASID Required bit The PRG Response PASID Required bit in the PRI Capability is read-only. Read it once when we enumerate the device and cache the value so we don't need to read it again. Based-on-patch-by: Kuppuswamy Sathyanarayanan Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 6542100bd2dd..64d35e730fab 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -456,6 +456,7 @@ struct pci_dev { #ifdef CONFIG_PCI_PRI u16 pri_cap; /* PRI Capability offset */ u32 pri_reqs_alloc; /* Number of PRI requests allocated */ + unsigned int pasid_required:1; /* PRG Response PASID Required */ #endif #ifdef CONFIG_PCI_PASID u16 pasid_cap; /* PASID Capability offset */ -- cgit v1.2.3 From b24d5c2098596a41cf187af41287777a2e0dd753 Mon Sep 17 00:00:00 2001 From: Krzysztof Wilczynski Date: Sat, 14 Sep 2019 23:30:32 +0200 Subject: PCI/ATS: Consolidate ATS declarations in linux/pci-ats.h Move ATS function prototypes from include/linux/pci.h to include/linux/pci-ats.h as the ATS, PRI, and PASID interfaces are related and are used only by the IOMMU drivers. This effectively reverts ff9bee895c4d ("PCI: Move ATS declarations to linux/pci.h so they're all together"). Also, remove surplus forward declaration of struct pci_ats from include/linux/pci.h, as it is no longer needed, since struct pci_ats was embedded directly into struct pci_dev by d544d75ac96a ("PCI: Embed ATS info directly into struct pci_dev"). No functional changes intended. Link: https://lore.kernel.org/r/20190914213032.22314-1-kw@linux.com Signed-off-by: Krzysztof Wilczynski Signed-off-by: Bjorn Helgaas --- include/linux/pci-ats.h | 75 ++++++++++++++++++------------------------------- include/linux/pci.h | 14 --------- 2 files changed, 28 insertions(+), 61 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h index a7a2b3d94fcc..67de3a9499bb 100644 --- a/include/linux/pci-ats.h +++ b/include/linux/pci-ats.h @@ -4,73 +4,54 @@ #include -#ifdef CONFIG_PCI_PRI +#ifdef CONFIG_PCI_ATS +/* Address Translation Service */ +int pci_enable_ats(struct pci_dev *dev, int ps); +void pci_disable_ats(struct pci_dev *dev); +int pci_ats_queue_depth(struct pci_dev *dev); +int pci_ats_page_aligned(struct pci_dev *dev); +#else /* CONFIG_PCI_ATS */ +static inline int pci_enable_ats(struct pci_dev *d, int ps) +{ return -ENODEV; } +static inline void pci_disable_ats(struct pci_dev *d) { } +static inline int pci_ats_queue_depth(struct pci_dev *d) +{ return -ENODEV; } +static inline int pci_ats_page_aligned(struct pci_dev *dev) +{ return 0; } +#endif /* CONFIG_PCI_ATS */ +#ifdef CONFIG_PCI_PRI int pci_enable_pri(struct pci_dev *pdev, u32 reqs); void pci_disable_pri(struct pci_dev *pdev); void pci_restore_pri_state(struct pci_dev *pdev); int pci_reset_pri(struct pci_dev *pdev); int pci_prg_resp_pasid_required(struct pci_dev *pdev); - #else /* CONFIG_PCI_PRI */ - static inline int pci_enable_pri(struct pci_dev *pdev, u32 reqs) -{ - return -ENODEV; -} - -static inline void pci_disable_pri(struct pci_dev *pdev) -{ -} - -static inline void pci_restore_pri_state(struct pci_dev *pdev) -{ -} - +{ return -ENODEV; } +static inline void pci_disable_pri(struct pci_dev *pdev) { } +static inline void pci_restore_pri_state(struct pci_dev *pdev) { } static inline int pci_reset_pri(struct pci_dev *pdev) -{ - return -ENODEV; -} - +{ return -ENODEV; } static inline int pci_prg_resp_pasid_required(struct pci_dev *pdev) -{ - return 0; -} +{ return 0; } #endif /* CONFIG_PCI_PRI */ #ifdef CONFIG_PCI_PASID - int pci_enable_pasid(struct pci_dev *pdev, int features); void pci_disable_pasid(struct pci_dev *pdev); void pci_restore_pasid_state(struct pci_dev *pdev); int pci_pasid_features(struct pci_dev *pdev); int pci_max_pasids(struct pci_dev *pdev); - -#else /* CONFIG_PCI_PASID */ - +#else /* CONFIG_PCI_PASID */ static inline int pci_enable_pasid(struct pci_dev *pdev, int features) -{ - return -EINVAL; -} - -static inline void pci_disable_pasid(struct pci_dev *pdev) -{ -} - -static inline void pci_restore_pasid_state(struct pci_dev *pdev) -{ -} - +{ return -EINVAL; } +static inline void pci_disable_pasid(struct pci_dev *pdev) { } +static inline void pci_restore_pasid_state(struct pci_dev *pdev) { } static inline int pci_pasid_features(struct pci_dev *pdev) -{ - return -EINVAL; -} - +{ return -EINVAL; } static inline int pci_max_pasids(struct pci_dev *pdev) -{ - return -EINVAL; -} +{ return -EINVAL; } #endif /* CONFIG_PCI_PASID */ - -#endif /* LINUX_PCI_ATS_H*/ +#endif /* LINUX_PCI_ATS_H */ diff --git a/include/linux/pci.h b/include/linux/pci.h index 64d35e730fab..9fc22f48055e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -284,7 +284,6 @@ struct irq_affinity; struct pcie_link_state; struct pci_vpd; struct pci_sriov; -struct pci_ats; struct pci_p2pdma; /* The pci_dev structure describes PCI devices */ @@ -1778,19 +1777,6 @@ pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs, NULL); } -#ifdef CONFIG_PCI_ATS -/* Address Translation Service */ -int pci_enable_ats(struct pci_dev *dev, int ps); -void pci_disable_ats(struct pci_dev *dev); -int pci_ats_queue_depth(struct pci_dev *dev); -int pci_ats_page_aligned(struct pci_dev *dev); -#else -static inline int pci_enable_ats(struct pci_dev *d, int ps) { return -ENODEV; } -static inline void pci_disable_ats(struct pci_dev *d) { } -static inline int pci_ats_queue_depth(struct pci_dev *d) { return -ENODEV; } -static inline int pci_ats_page_aligned(struct pci_dev *dev) { return 0; } -#endif - /* Include architecture-dependent settings and functions */ #include -- cgit v1.2.3 From c6e9aefbf9db818d60818aa5540d78c1da289aae Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 9 Oct 2019 16:27:30 -0500 Subject: PCI/ATS: Remove unused PRI and PASID stubs The following functions are only used by amd_iommu.c and intel-iommu.c (when CONFIG_INTEL_IOMMU_SVM is enabled). CONFIG_PCI_PRI and CONFIG_PCI_PASID are always defined in those cases, so there's no need for the stubs. pci_enable_pri() pci_disable_pri() pci_reset_pri() pci_prg_resp_pasid_required() pci_enable_pasid() pci_disable_pasid() Remove the unused stubs. Signed-off-by: Bjorn Helgaas Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Joerg Roedel --- include/linux/pci-ats.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h index 67de3a9499bb..963c11f7c56b 100644 --- a/include/linux/pci-ats.h +++ b/include/linux/pci-ats.h @@ -27,14 +27,7 @@ void pci_restore_pri_state(struct pci_dev *pdev); int pci_reset_pri(struct pci_dev *pdev); int pci_prg_resp_pasid_required(struct pci_dev *pdev); #else /* CONFIG_PCI_PRI */ -static inline int pci_enable_pri(struct pci_dev *pdev, u32 reqs) -{ return -ENODEV; } -static inline void pci_disable_pri(struct pci_dev *pdev) { } static inline void pci_restore_pri_state(struct pci_dev *pdev) { } -static inline int pci_reset_pri(struct pci_dev *pdev) -{ return -ENODEV; } -static inline int pci_prg_resp_pasid_required(struct pci_dev *pdev) -{ return 0; } #endif /* CONFIG_PCI_PRI */ #ifdef CONFIG_PCI_PASID @@ -44,9 +37,6 @@ void pci_restore_pasid_state(struct pci_dev *pdev); int pci_pasid_features(struct pci_dev *pdev); int pci_max_pasids(struct pci_dev *pdev); #else /* CONFIG_PCI_PASID */ -static inline int pci_enable_pasid(struct pci_dev *pdev, int features) -{ return -EINVAL; } -static inline void pci_disable_pasid(struct pci_dev *pdev) { } static inline void pci_restore_pasid_state(struct pci_dev *pdev) { } static inline int pci_pasid_features(struct pci_dev *pdev) { return -EINVAL; } -- cgit v1.2.3 From fef2dd8b3966517172514ea5a89104ba7745678b Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 9 Oct 2019 16:47:15 -0500 Subject: PCI/ATS: Make pci_restore_pri_state(), pci_restore_pasid_state() private These interfaces: void pci_restore_pri_state(struct pci_dev *pdev); void pci_restore_pasid_state(struct pci_dev *pdev); are only used in drivers/pci and do not need to be seen by the rest of the kernel. Most them to drivers/pci/pci.h so they're private to the PCI subsystem. Signed-off-by: Bjorn Helgaas Reviewed-by: Joerg Roedel --- include/linux/pci-ats.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h index 963c11f7c56b..5d62e78946a3 100644 --- a/include/linux/pci-ats.h +++ b/include/linux/pci-ats.h @@ -23,21 +23,16 @@ static inline int pci_ats_page_aligned(struct pci_dev *dev) #ifdef CONFIG_PCI_PRI int pci_enable_pri(struct pci_dev *pdev, u32 reqs); void pci_disable_pri(struct pci_dev *pdev); -void pci_restore_pri_state(struct pci_dev *pdev); int pci_reset_pri(struct pci_dev *pdev); int pci_prg_resp_pasid_required(struct pci_dev *pdev); -#else /* CONFIG_PCI_PRI */ -static inline void pci_restore_pri_state(struct pci_dev *pdev) { } #endif /* CONFIG_PCI_PRI */ #ifdef CONFIG_PCI_PASID int pci_enable_pasid(struct pci_dev *pdev, int features); void pci_disable_pasid(struct pci_dev *pdev); -void pci_restore_pasid_state(struct pci_dev *pdev); int pci_pasid_features(struct pci_dev *pdev); int pci_max_pasids(struct pci_dev *pdev); #else /* CONFIG_PCI_PASID */ -static inline void pci_restore_pasid_state(struct pci_dev *pdev) { } static inline int pci_pasid_features(struct pci_dev *pdev) { return -EINVAL; } static inline int pci_max_pasids(struct pci_dev *pdev) -- cgit v1.2.3 From 554032cdfbf4491f38241a3f6b27459408d90df3 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 15 Oct 2019 11:28:46 +0100 Subject: net: phylink: use more linkmode_* Use more linkmode_* helpers rather than open-coding the bitmap operations. Signed-off-by: Russell King Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/linkmode.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/linkmode.h b/include/linux/linkmode.h index a99c58866860..fe740031339d 100644 --- a/include/linux/linkmode.h +++ b/include/linux/linkmode.h @@ -82,4 +82,10 @@ static inline int linkmode_equal(const unsigned long *src1, return bitmap_equal(src1, src2, __ETHTOOL_LINK_MODE_MASK_NBITS); } +static inline int linkmode_subset(const unsigned long *src1, + const unsigned long *src2) +{ + return bitmap_subset(src1, src2, __ETHTOOL_LINK_MODE_MASK_NBITS); +} + #endif /* __LINKMODE_H */ -- cgit v1.2.3 From 299b610117a4145dfe15963f0ea037ab319ce531 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Mon, 14 Oct 2019 23:46:21 +0200 Subject: rtc: ds1685: add indirect access method and remove plat_read/plat_write SGI Octane (IP30) doesn't have RTC register directly mapped into CPU address space, but accesses RTC registers with an address and data register. This is now supported by additional access functions, which are selected by a new field in platform data. Removed plat_read/plat_write since there is no user and their usage could introduce lifetime issue, when functions are placed in different modules. Signed-off-by: Thomas Bogendoerfer Acked-by: Joshua Kinard Reviewed-by: Joshua Kinard Link: https://lore.kernel.org/r/20191014214621.25257-1-tbogendoerfer@suse.de Signed-off-by: Alexandre Belloni --- include/linux/rtc/ds1685.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rtc/ds1685.h b/include/linux/rtc/ds1685.h index 101c7adc05a2..67ee9d20cc5a 100644 --- a/include/linux/rtc/ds1685.h +++ b/include/linux/rtc/ds1685.h @@ -42,6 +42,7 @@ struct ds1685_priv { struct rtc_device *dev; void __iomem *regs; + void __iomem *data; u32 regstep; int irq_num; bool bcd_mode; @@ -70,12 +71,13 @@ struct ds1685_rtc_platform_data { const bool bcd_mode; const bool no_irq; const bool uie_unsupported; - const bool alloc_io_resources; - u8 (*plat_read)(struct ds1685_priv *, int); - void (*plat_write)(struct ds1685_priv *, int, u8); void (*plat_prepare_poweroff)(void); void (*plat_wake_alarm)(void); void (*plat_post_ram_clear)(void); + enum { + ds1685_reg_direct, + ds1685_reg_indirect + } access_type; }; -- cgit v1.2.3 From e502ff8606b32df4f9f2435ab00278312db125b3 Mon Sep 17 00:00:00 2001 From: Tejas Patel Date: Mon, 26 Aug 2019 13:30:44 -0700 Subject: soc: xilinx: Set CAP_UNUSABLE requirement for versal while powering down domain For "0" requirement which is used to inform firmware that device is not required currently by master, Versal PLM (Platform Loader and Manager) which runs on Platform Management Controller and is responsible platform management of devices that disables clock, power it down and reset the device. genpd_power_off() is being called during runtime suspend also. So, if any device goes to runtime suspend state during resumes it needs to be re-initialized again. It is possible that drivers do not reinitialize device upon resume from runtime suspend every time ans so dont want it to be powered down or get reset during runtime suspend. In Versal PLM new PM_CAP_UNUSABLE capability is added, which disables clock only and avoids power down and reset during runtime suspend. Power and reset will be gated with core suspend.So, this patch sets CAPABILITY_UNUSABLE requirement during gpd_power_off() if platform is other than zynqmp. Signed-off-by: Tejas Patel Signed-off-by: Jolly Shah Signed-off-by: Michal Simek --- include/linux/firmware/xlnx-zynqmp.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index 778abbbc7d94..adb14bcedca2 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -2,7 +2,7 @@ /* * Xilinx Zynq MPSoC Firmware layer * - * Copyright (C) 2014-2018 Xilinx + * Copyright (C) 2014-2019 Xilinx * * Michal Simek * Davorin Mista @@ -46,6 +46,7 @@ #define ZYNQMP_PM_CAPABILITY_ACCESS 0x1U #define ZYNQMP_PM_CAPABILITY_CONTEXT 0x2U #define ZYNQMP_PM_CAPABILITY_WAKEUP 0x4U +#define ZYNQMP_PM_CAPABILITY_UNUSABLE 0x8U /* * Firmware FPGA Manager flags -- cgit v1.2.3 From e40d38f28c10e3010b2828f2c737b50fb81bda8f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 11 Oct 2019 15:29:29 +0200 Subject: debugfs: remove return value of debugfs_create_x16() No one checks the return value of debugfs_create_x16(), as it's not needed, so make the return value void, so that no one tries to do so in the future. Link: https://lore.kernel.org/r/20191011132931.1186197-6-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/debugfs.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 1d859bc657bd..e742081c6844 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -109,8 +109,8 @@ struct dentry *debugfs_create_ulong(const char *name, umode_t mode, struct dentry *parent, unsigned long *value); struct dentry *debugfs_create_x8(const char *name, umode_t mode, struct dentry *parent, u8 *value); -struct dentry *debugfs_create_x16(const char *name, umode_t mode, - struct dentry *parent, u16 *value); +void debugfs_create_x16(const char *name, umode_t mode, struct dentry *parent, + u16 *value); struct dentry *debugfs_create_x32(const char *name, umode_t mode, struct dentry *parent, u32 *value); struct dentry *debugfs_create_x64(const char *name, umode_t mode, @@ -275,12 +275,8 @@ static inline struct dentry *debugfs_create_x8(const char *name, umode_t mode, return ERR_PTR(-ENODEV); } -static inline struct dentry *debugfs_create_x16(const char *name, umode_t mode, - struct dentry *parent, - u16 *value) -{ - return ERR_PTR(-ENODEV); -} +static inline void debugfs_create_x16(const char *name, umode_t mode, + struct dentry *parent, u16 *value) { } static inline struct dentry *debugfs_create_x32(const char *name, umode_t mode, struct dentry *parent, -- cgit v1.2.3 From f5cb0a7e64f41b6f1c5cacc64a476962f5e97f91 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 11 Oct 2019 15:29:30 +0200 Subject: debugfs: remove return value of debugfs_create_x32() No one checks the return value of debugfs_create_x32(), as it's not needed, so make the return value void, so that no one tries to do so in the future. Link: https://lore.kernel.org/r/20191011132931.1186197-7-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/debugfs.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index e742081c6844..89d0b02f82f5 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -111,8 +111,8 @@ struct dentry *debugfs_create_x8(const char *name, umode_t mode, struct dentry *parent, u8 *value); void debugfs_create_x16(const char *name, umode_t mode, struct dentry *parent, u16 *value); -struct dentry *debugfs_create_x32(const char *name, umode_t mode, - struct dentry *parent, u32 *value); +void debugfs_create_x32(const char *name, umode_t mode, struct dentry *parent, + u32 *value); struct dentry *debugfs_create_x64(const char *name, umode_t mode, struct dentry *parent, u64 *value); void debugfs_create_size_t(const char *name, umode_t mode, @@ -278,12 +278,8 @@ static inline struct dentry *debugfs_create_x8(const char *name, umode_t mode, static inline void debugfs_create_x16(const char *name, umode_t mode, struct dentry *parent, u16 *value) { } -static inline struct dentry *debugfs_create_x32(const char *name, umode_t mode, - struct dentry *parent, - u32 *value) -{ - return ERR_PTR(-ENODEV); -} +static inline void debugfs_create_x32(const char *name, umode_t mode, + struct dentry *parent, u32 *value) { } static inline struct dentry *debugfs_create_x64(const char *name, umode_t mode, struct dentry *parent, -- cgit v1.2.3 From 0864c408fb1e5b02d817cc8cd5b794d4cb491d50 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 11 Oct 2019 15:29:31 +0200 Subject: debugfs: remove return value of debugfs_create_x64() No one checks the return value of debugfs_create_x64(), as it's not needed, so make the return value void, so that no one tries to do so in the future. Link: https://lore.kernel.org/r/20191011132931.1186197-8-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/debugfs.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 89d0b02f82f5..33690949b45d 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -113,8 +113,8 @@ void debugfs_create_x16(const char *name, umode_t mode, struct dentry *parent, u16 *value); void debugfs_create_x32(const char *name, umode_t mode, struct dentry *parent, u32 *value); -struct dentry *debugfs_create_x64(const char *name, umode_t mode, - struct dentry *parent, u64 *value); +void debugfs_create_x64(const char *name, umode_t mode, struct dentry *parent, + u64 *value); void debugfs_create_size_t(const char *name, umode_t mode, struct dentry *parent, size_t *value); struct dentry *debugfs_create_atomic_t(const char *name, umode_t mode, @@ -281,12 +281,8 @@ static inline void debugfs_create_x16(const char *name, umode_t mode, static inline void debugfs_create_x32(const char *name, umode_t mode, struct dentry *parent, u32 *value) { } -static inline struct dentry *debugfs_create_x64(const char *name, umode_t mode, - struct dentry *parent, - u64 *value) -{ - return ERR_PTR(-ENODEV); -} +static inline void debugfs_create_x64(const char *name, umode_t mode, + struct dentry *parent, u64 *value) { } static inline void debugfs_create_size_t(const char *name, umode_t mode, struct dentry *parent, size_t *value) -- cgit v1.2.3 From 2203cbf2c8b58a1e3bef98c47531d431d11639a0 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 15 Oct 2019 11:38:39 +0100 Subject: net: sfp: move fwnode parsing into sfp-bus layer Rather than parsing the sfp firmware node in phylink, parse it in the sfp-bus code, so we can re-use this code for PHYs without having to duplicate the parsing. Signed-off-by: Russell King Signed-off-by: David S. Miller --- include/linux/sfp.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sfp.h b/include/linux/sfp.h index 1c35428e98bc..355a08a76fd4 100644 --- a/include/linux/sfp.h +++ b/include/linux/sfp.h @@ -508,9 +508,9 @@ int sfp_get_module_eeprom(struct sfp_bus *bus, struct ethtool_eeprom *ee, u8 *data); void sfp_upstream_start(struct sfp_bus *bus); void sfp_upstream_stop(struct sfp_bus *bus); -struct sfp_bus *sfp_register_upstream(struct fwnode_handle *fwnode, - void *upstream, - const struct sfp_upstream_ops *ops); +struct sfp_bus *sfp_register_upstream_node(struct fwnode_handle *fwnode, + void *upstream, + const struct sfp_upstream_ops *ops); void sfp_unregister_upstream(struct sfp_bus *bus); #else static inline int sfp_parse_port(struct sfp_bus *bus, @@ -553,11 +553,11 @@ static inline void sfp_upstream_stop(struct sfp_bus *bus) { } -static inline struct sfp_bus *sfp_register_upstream( +static inline struct sfp_bus *sfp_register_upstream_node( struct fwnode_handle *fwnode, void *upstream, const struct sfp_upstream_ops *ops) { - return (struct sfp_bus *)-1; + return NULL; } static inline void sfp_unregister_upstream(struct sfp_bus *bus) -- cgit v1.2.3 From ca58fbe06c54795f00db79e447f94c2028d30124 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 11 Oct 2019 00:30:37 +0200 Subject: netfilter: add and use nf_hook_slow_list() At this time, NF_HOOK_LIST() macro will iterate the list and then calls nf_hook() for each individual skb. This makes it so the entire list is passed into the netfilter core. The advantage is that we only need to fetch the rule blob once per list instead of per-skb. NF_HOOK_LIST now only works for ipv4 and ipv6, as those are the only callers. v2: use skb_list_del_init() instead of list_del (Edward Cree) Signed-off-by: Florian Westphal Acked-by: Edward Cree Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 41 +++++++++++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 77ebb61faf48..eb312e7ca36e 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -199,6 +199,8 @@ extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state, const struct nf_hook_entries *e, unsigned int i); +void nf_hook_slow_list(struct list_head *head, struct nf_hook_state *state, + const struct nf_hook_entries *e); /** * nf_hook - call a netfilter hook * @@ -311,17 +313,36 @@ NF_HOOK_LIST(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct list_head *head, struct net_device *in, struct net_device *out, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { - struct sk_buff *skb, *next; - struct list_head sublist; - - INIT_LIST_HEAD(&sublist); - list_for_each_entry_safe(skb, next, head, list) { - list_del(&skb->list); - if (nf_hook(pf, hook, net, sk, skb, in, out, okfn) == 1) - list_add_tail(&skb->list, &sublist); + struct nf_hook_entries *hook_head = NULL; + +#ifdef CONFIG_JUMP_LABEL + if (__builtin_constant_p(pf) && + __builtin_constant_p(hook) && + !static_key_false(&nf_hooks_needed[pf][hook])) + return; +#endif + + rcu_read_lock(); + switch (pf) { + case NFPROTO_IPV4: + hook_head = rcu_dereference(net->nf.hooks_ipv4[hook]); + break; + case NFPROTO_IPV6: + hook_head = rcu_dereference(net->nf.hooks_ipv6[hook]); + break; + default: + WARN_ON_ONCE(1); + break; } - /* Put passed packets back on main list */ - list_splice(&sublist, head); + + if (hook_head) { + struct nf_hook_state state; + + nf_hook_state_init(&state, hook, pf, in, out, sk, net, okfn); + + nf_hook_slow_list(head, &state, hook_head); + } + rcu_read_unlock(); } /* Call setsockopt() */ -- cgit v1.2.3 From 92df01e3601fe29eb3727a82705eafa6209053f5 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Fri, 11 Oct 2019 12:15:20 -0700 Subject: driver: core: Improve documentation for fwnode_operations.add_links() The add_links() ops shouldn't return on the first failed device link add. It needs to continue trying to add device links to other suppliers that are available. The documentation didn't explain WHY this behavior is necessary. So, update the documentation with an example that explains why this is necessary. Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20191011191521.179614-3-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/fwnode.h | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 6ae05b9ce359..97223e2410bd 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -71,8 +71,25 @@ struct fwnode_reference_args { * links to all the suppliers of the device that are available at * the time this function is called. The function must NOT stop * at the first failed device link if other unlinked supplier - * devices are present in the system. If some suppliers are not - * yet available, this function will be called again when other + * devices are present in the system. This is necessary for the + * driver/bus sync_state() callbacks to work correctly. + * + * For example, say Device-C depends on suppliers Device-S1 and + * Device-S2 and the dependency is listed in that order in the + * firmware. Say, S1 gets populated from the firmware after + * late_initcall_sync(). Say S2 is populated and probed way + * before that in device_initcall(). When C is populated, if this + * add_links() function doesn't continue past a "failed linking to + * S1" and continue linking C to S2, then S2 will get a + * sync_state() callback before C is probed. This is because from + * the perspective of S2, C was never a consumer when its + * sync_state() evaluation is done. To avoid this, the add_links() + * function has to go through all available suppliers of the + * device (that corresponds to this fwnode) and link to them + * before returning. + * + * If some suppliers are not yet available (indicated by an error + * return value), this function will be called again when other * devices are added to allow creating device links to any newly * available suppliers. * -- cgit v1.2.3 From 3d6d8da48d0b214d65ea0227d47228abc75d7c88 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 17 Oct 2019 12:18:28 +0200 Subject: pidfd: check pid has attached task in fdinfo Currently, when a task is dead we still print the pid it used to use in the fdinfo files of its pidfds. This doesn't make much sense since the pid may have already been reused. So verify that the task is still alive by introducing the pid_has_task() helper which will be used by other callers in follow-up patches. If the task is not alive anymore, we will print -1. This allows us to differentiate between a task not being present in a given pid namespace - in which case we already print 0 - and a task having been reaped. Note that this uses PIDTYPE_PID for the check. Technically, we could've checked PIDTYPE_TGID since pidfds currently only refer to thread-group leaders but if they won't anymore in the future then this check becomes problematic without it being immediately obvious to non-experts imho. If a thread is created via clone(CLONE_THREAD) than struct pid has a single non-empty list pid->tasks[PIDTYPE_PID] and this pid can't be used as a PIDTYPE_TGID meaning pid->tasks[PIDTYPE_TGID] will return NULL even though the thread-group leader might still be very much alive. So checking PIDTYPE_PID is fine and is easier to maintain should we ever allow pidfds to refer to threads. Cc: Jann Horn Cc: Christian Kellner Cc: linux-api@vger.kernel.org Signed-off-by: Christian Brauner Reviewed-by: Oleg Nesterov Link: https://lore.kernel.org/r/20191017101832.5985-1-christian.brauner@ubuntu.com --- include/linux/pid.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pid.h b/include/linux/pid.h index 9645b1194c98..034e3cd60dc0 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -85,6 +85,10 @@ static inline struct pid *get_pid(struct pid *pid) extern void put_pid(struct pid *pid); extern struct task_struct *pid_task(struct pid *pid, enum pid_type); +static inline bool pid_has_task(struct pid *pid, enum pid_type type) +{ + return !hlist_empty(&pid->tasks[type]); +} extern struct task_struct *get_pid_task(struct pid *pid, enum pid_type); extern struct pid *get_task_pid(struct task_struct *task, enum pid_type type); -- cgit v1.2.3 From 57f5677e535ba24b8926a7125be2ef8d7f09323c Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Tue, 15 Oct 2019 21:07:05 +0200 Subject: printf: add support for printing symbolic error names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It has been suggested several times to extend vsnprintf() to be able to convert the numeric value of ENOSPC to print "ENOSPC". This implements that as a %p extension: With %pe, one can do if (IS_ERR(foo)) { pr_err("Sorry, can't do that: %pe\n", foo); return PTR_ERR(foo); } instead of what is seen in quite a few places in the kernel: if (IS_ERR(foo)) { pr_err("Sorry, can't do that: %ld\n", PTR_ERR(foo)); return PTR_ERR(foo); } If the value passed to %pe is an ERR_PTR, but the library function errname() added here doesn't know about the value, the value is simply printed in decimal. If the value passed to %pe is not an ERR_PTR, we treat it as an ordinary %p and thus print the hashed value (passing non-ERR_PTR values to %pe indicates a bug in the caller, but we can't do much about that). With my embedded hat on, and because it's not very invasive to do, I've made it possible to remove this. The errname() function and associated lookup tables take up about 3K. For most, that's probably quite acceptable and a price worth paying for more readable dmesg (once this starts getting used), while for those that disable printk() it's of very little use - I don't see a procfs/sysfs/seq_printf() file reasonably making use of this - and they clearly want to squeeze vmlinux as much as possible. Hence the default y if PRINTK. The symbols to include have been found by massaging the output of find arch include -iname 'errno*.h' | xargs grep -E 'define\s*E' In the cases where some common aliasing exists (e.g. EAGAIN=EWOULDBLOCK on all platforms, EDEADLOCK=EDEADLK on most), I've moved the more popular one (in terms of 'git grep -w Efoo | wc) to the bottom so that one takes precedence. Link: http://lkml.kernel.org/r/20191015190706.15989-1-linux@rasmusvillemoes.dk To: "Jonathan Corbet" To: linux-kernel@vger.kernel.org Cc: "Andy Shevchenko" Cc: "Andrew Morton" Cc: "Joe Perches" Cc: linux-doc@vger.kernel.org Signed-off-by: Rasmus Villemoes Acked-by: Uwe Kleine-König Reviewed-by: Petr Mladek [andy.shevchenko@gmail.com: use abs()] Acked-by: Andy Shevchenko Signed-off-by: Petr Mladek --- include/linux/errname.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 include/linux/errname.h (limited to 'include/linux') diff --git a/include/linux/errname.h b/include/linux/errname.h new file mode 100644 index 000000000000..e8576ad90cb7 --- /dev/null +++ b/include/linux/errname.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_ERRNAME_H +#define _LINUX_ERRNAME_H + +#include + +#ifdef CONFIG_SYMBOLIC_ERRNAME +const char *errname(int err); +#else +static inline const char *errname(int err) +{ + return NULL; +} +#endif + +#endif /* _LINUX_ERRNAME_H */ -- cgit v1.2.3 From 32adcaa010fa85e09296a6a606ad07348ef349ed Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Thu, 8 Aug 2019 15:47:24 +0200 Subject: ARM: mmp: move cputype.h to include/linux/soc/ Let's move cputype.h away from mach-mmp/ so that the drivers outside that directory are able to tell the precise silicon revision. The MMP3 USB OTG PHY driver needs this. Signed-off-by: Lubomir Rintel --- include/linux/soc/mmp/cputype.h | 85 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 include/linux/soc/mmp/cputype.h (limited to 'include/linux') diff --git a/include/linux/soc/mmp/cputype.h b/include/linux/soc/mmp/cputype.h new file mode 100644 index 000000000000..c3ec88983e94 --- /dev/null +++ b/include/linux/soc/mmp/cputype.h @@ -0,0 +1,85 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_MACH_CPUTYPE_H +#define __ASM_MACH_CPUTYPE_H + +#include + +/* + * CPU Stepping CPU_ID CHIP_ID + * + * PXA168 S0 0x56158400 0x0000C910 + * PXA168 A0 0x56158400 0x00A0A168 + * PXA910 Y1 0x56158400 0x00F2C920 + * PXA910 A0 0x56158400 0x00F2C910 + * PXA910 A1 0x56158400 0x00A0C910 + * PXA920 Y0 0x56158400 0x00F2C920 + * PXA920 A0 0x56158400 0x00A0C920 + * PXA920 A1 0x56158400 0x00A1C920 + * MMP2 Z0 0x560f5811 0x00F00410 + * MMP2 Z1 0x560f5811 0x00E00410 + * MMP2 A0 0x560f5811 0x00A0A610 + * MMP3 A0 0x562f5842 0x00A02128 + * MMP3 B0 0x562f5842 0x00B02128 + */ + +extern unsigned int mmp_chip_id; + +#ifdef CONFIG_CPU_PXA168 +static inline int cpu_is_pxa168(void) +{ + return (((read_cpuid_id() >> 8) & 0xff) == 0x84) && + ((mmp_chip_id & 0xfff) == 0x168); +} +#else +#define cpu_is_pxa168() (0) +#endif + +/* cpu_is_pxa910() is shared on both pxa910 and pxa920 */ +#ifdef CONFIG_CPU_PXA910 +static inline int cpu_is_pxa910(void) +{ + return (((read_cpuid_id() >> 8) & 0xff) == 0x84) && + (((mmp_chip_id & 0xfff) == 0x910) || + ((mmp_chip_id & 0xfff) == 0x920)); +} +#else +#define cpu_is_pxa910() (0) +#endif + +#if defined(CONFIG_CPU_MMP2) || defined(CONFIG_MACH_MMP2_DT) +static inline int cpu_is_mmp2(void) +{ + return (((read_cpuid_id() >> 8) & 0xff) == 0x58) && + (((mmp_chip_id & 0xfff) == 0x410) || + ((mmp_chip_id & 0xfff) == 0x610)); +} +#else +#define cpu_is_mmp2() (0) +#endif + +#ifdef CONFIG_MACH_MMP3_DT +static inline int cpu_is_mmp3(void) +{ + return (((read_cpuid_id() >> 8) & 0xff) == 0x58) && + ((mmp_chip_id & 0xffff) == 0x2128); +} + +static inline int cpu_is_mmp3_a0(void) +{ + return (cpu_is_mmp3() && + ((mmp_chip_id & 0x00ff0000) == 0x00a00000)); +} + +static inline int cpu_is_mmp3_b0(void) +{ + return (cpu_is_mmp3() && + ((mmp_chip_id & 0x00ff0000) == 0x00b00000)); +} + +#else +#define cpu_is_mmp3() (0) +#define cpu_is_mmp3_a0() (0) +#define cpu_is_mmp3_b0() (0) +#endif + +#endif /* __ASM_MACH_CPUTYPE_H */ -- cgit v1.2.3 From 7c6a469e3416fa23568c2395a3faa7dd6e376dcb Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 15 Oct 2019 20:24:56 -0700 Subject: bpf: Add typecast to bpf helpers to help BTF generation When pahole converts dwarf to btf it emits only used types. Wrap existing bpf helper functions into typedef and use it in typecast to make gcc emits this type into dwarf. Then pahole will convert it to btf. The "btf_#name_of_helper" types will be used to figure out types of arguments of bpf helpers. The generated code before and after is the same. Only dwarf and btf sections are different. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: John Fastabend Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20191016032505.2089704-3-ast@kernel.org --- include/linux/filter.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 2ce57645f3cd..d3d51d7aff2c 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -464,10 +464,11 @@ static inline bool insn_is_zext(const struct bpf_insn *insn) #define BPF_CALL_x(x, name, ...) \ static __always_inline \ u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \ + typedef u64 (*btf_##name)(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \ u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)); \ u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)) \ { \ - return ____##name(__BPF_MAP(x,__BPF_CAST,__BPF_N,__VA_ARGS__));\ + return ((btf_##name)____##name)(__BPF_MAP(x,__BPF_CAST,__BPF_N,__VA_ARGS__));\ } \ static __always_inline \ u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)) -- cgit v1.2.3 From 8580ac9404f6240668a026785d7d8856f0530409 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 15 Oct 2019 20:24:57 -0700 Subject: bpf: Process in-kernel BTF If in-kernel BTF exists parse it and prepare 'struct btf *btf_vmlinux' for further use by the verifier. In-kernel BTF is trusted just like kallsyms and other build artifacts embedded into vmlinux. Yet run this BTF image through BTF verifier to make sure that it is valid and it wasn't mangled during the build. If in-kernel BTF is incorrect it means either gcc or pahole or kernel are buggy. In such case disallow loading BPF programs. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20191016032505.2089704-4-ast@kernel.org --- include/linux/bpf_verifier.h | 4 +++- include/linux/btf.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 26a6d58ca78c..713efae62e96 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -330,10 +330,12 @@ static inline bool bpf_verifier_log_full(const struct bpf_verifier_log *log) #define BPF_LOG_STATS 4 #define BPF_LOG_LEVEL (BPF_LOG_LEVEL1 | BPF_LOG_LEVEL2) #define BPF_LOG_MASK (BPF_LOG_LEVEL | BPF_LOG_STATS) +#define BPF_LOG_KERNEL (BPF_LOG_MASK + 1) /* kernel internal flag */ static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log) { - return log->level && log->ubuf && !bpf_verifier_log_full(log); + return (log->level && log->ubuf && !bpf_verifier_log_full(log)) || + log->level == BPF_LOG_KERNEL; } #define BPF_MAX_SUBPROGS 256 diff --git a/include/linux/btf.h b/include/linux/btf.h index 64cdf2a23d42..55d43bc856be 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -56,6 +56,7 @@ bool btf_type_is_void(const struct btf_type *t); #ifdef CONFIG_BPF_SYSCALL const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id); const char *btf_name_by_offset(const struct btf *btf, u32 offset); +struct btf *btf_parse_vmlinux(void); #else static inline const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id) -- cgit v1.2.3 From ccfe29eb29c2edcea6552072ef00ff4117f53e83 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 15 Oct 2019 20:24:58 -0700 Subject: bpf: Add attach_btf_id attribute to program load Add attach_btf_id attribute to prog_load command. It's similar to existing expected_attach_type attribute which is used in several cgroup based program types. Unfortunately expected_attach_type is ignored for tracing programs and cannot be reused for new purpose. Hence introduce attach_btf_id to verify bpf programs against given in-kernel BTF type id at load time. It is strictly checked to be valid for raw_tp programs only. In a later patches it will become: btf_id == 0 semantics of existing raw_tp progs. btd_id > 0 raw_tp with BTF and additional type safety. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20191016032505.2089704-5-ast@kernel.org --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 282e28bf41ec..f916380675dd 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -375,6 +375,7 @@ struct bpf_prog_aux { u32 id; u32 func_cnt; /* used by non-func prog as the number of func progs */ u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */ + u32 attach_btf_id; /* in-kernel BTF type id to attach to */ bool verifier_zext; /* Zero extensions has been inserted by verifier. */ bool offload_requested; struct bpf_prog **func; -- cgit v1.2.3 From 9e15db66136a14cde3f35691f1d839d950118826 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 15 Oct 2019 20:25:00 -0700 Subject: bpf: Implement accurate raw_tp context access via BTF libbpf analyzes bpf C program, searches in-kernel BTF for given type name and stores it into expected_attach_type. The kernel verifier expects this btf_id to point to something like: typedef void (*btf_trace_kfree_skb)(void *, struct sk_buff *skb, void *loc); which represents signature of raw_tracepoint "kfree_skb". Then btf_ctx_access() matches ctx+0 access in bpf program with 'skb' and 'ctx+8' access with 'loc' arguments of "kfree_skb" tracepoint. In first case it passes btf_id of 'struct sk_buff *' back to the verifier core and 'void *' in second case. Then the verifier tracks PTR_TO_BTF_ID as any other pointer type. Like PTR_TO_SOCKET points to 'struct bpf_sock', PTR_TO_TCP_SOCK points to 'struct bpf_tcp_sock', and so on. PTR_TO_BTF_ID points to in-kernel structs. If 1234 is btf_id of 'struct sk_buff' in vmlinux's BTF then PTR_TO_BTF_ID#1234 points to one of in kernel skbs. When PTR_TO_BTF_ID#1234 is dereferenced (like r2 = *(u64 *)r1 + 32) the btf_struct_access() checks which field of 'struct sk_buff' is at offset 32. Checks that size of access matches type definition of the field and continues to track the dereferenced type. If that field was a pointer to 'struct net_device' the r2's type will be PTR_TO_BTF_ID#456. Where 456 is btf_id of 'struct net_device' in vmlinux's BTF. Such verifier analysis prevents "cheating" in BPF C program. The program cannot cast arbitrary pointer to 'struct sk_buff *' and access it. C compiler would allow type cast, of course, but the verifier will notice type mismatch based on BPF assembly and in-kernel BTF. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20191016032505.2089704-7-ast@kernel.org --- include/linux/bpf.h | 17 ++++++++++++++++- include/linux/bpf_verifier.h | 4 ++++ 2 files changed, 20 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f916380675dd..028555fcd10d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -16,6 +16,7 @@ #include struct bpf_verifier_env; +struct bpf_verifier_log; struct perf_event; struct bpf_prog; struct bpf_map; @@ -281,6 +282,7 @@ enum bpf_reg_type { PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */ PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */ PTR_TO_XDP_SOCK, /* reg points to struct xdp_sock */ + PTR_TO_BTF_ID, /* reg points to kernel struct */ }; /* The information passed from prog-specific *_is_valid_access @@ -288,7 +290,11 @@ enum bpf_reg_type { */ struct bpf_insn_access_aux { enum bpf_reg_type reg_type; - int ctx_field_size; + union { + int ctx_field_size; + u32 btf_id; + }; + struct bpf_verifier_log *log; /* for verbose logs */ }; static inline void @@ -483,6 +489,7 @@ struct bpf_event_entry { bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp); int bpf_prog_calc_tag(struct bpf_prog *fp); +const char *kernel_type_name(u32 btf_type_id); const struct bpf_func_proto *bpf_get_trace_printk_proto(void); @@ -748,6 +755,14 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); +bool btf_ctx_access(int off, int size, enum bpf_access_type type, + const struct bpf_prog *prog, + struct bpf_insn_access_aux *info); +int btf_struct_access(struct bpf_verifier_log *log, + const struct btf_type *t, int off, int size, + enum bpf_access_type atype, + u32 *next_btf_id); + #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 713efae62e96..6e7284ea1468 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -52,6 +52,8 @@ struct bpf_reg_state { */ struct bpf_map *map_ptr; + u32 btf_id; /* for PTR_TO_BTF_ID */ + /* Max size from any of the above. */ unsigned long raw; }; @@ -399,6 +401,8 @@ __printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt, va_list args); __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env, const char *fmt, ...); +__printf(2, 3) void bpf_log(struct bpf_verifier_log *log, + const char *fmt, ...); static inline struct bpf_func_state *cur_func(struct bpf_verifier_env *env) { -- cgit v1.2.3 From 2a02759ef5f8a34792df22b41d5e10658fd7bbd3 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 15 Oct 2019 20:25:02 -0700 Subject: bpf: Add support for BTF pointers to interpreter Pointer to BTF object is a pointer to kernel object or NULL. The memory access in the interpreter has to be done via probe_kernel_read to avoid page faults. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20191016032505.2089704-9-ast@kernel.org --- include/linux/filter.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index d3d51d7aff2c..22ebea2e64ea 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -65,6 +65,9 @@ struct ctl_table_header; /* unused opcode to mark special call to bpf_tail_call() helper */ #define BPF_TAIL_CALL 0xf0 +/* unused opcode to mark special load instruction. Same as BPF_ABS */ +#define BPF_PROBE_MEM 0x20 + /* unused opcode to mark call to interpreter with arguments */ #define BPF_CALL_ARGS 0xe0 -- cgit v1.2.3 From 3dec541b2e632d630fe7142ed44f0b3702ef1f8c Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 15 Oct 2019 20:25:03 -0700 Subject: bpf: Add support for BTF pointers to x86 JIT Pointer to BTF object is a pointer to kernel object or NULL. Such pointers can only be used by BPF_LDX instructions. The verifier changed their opcode from LDX|MEM|size to LDX|PROBE_MEM|size to make JITing easier. The number of entries in extable is the number of BPF_LDX insns that access kernel memory via "pointer to BTF type". Only these load instructions can fault. Since x86 extable is relative it has to be allocated in the same memory region as JITed code. Allocate it prior to last pass of JITing and let the last pass populate it. Pointer to extable in bpf_prog_aux is necessary to make page fault handling fast. Page fault handling is done in two steps: 1. bpf_prog_kallsyms_find() finds BPF program that page faulted. It's done by walking rb tree. 2. then extable for given bpf program is binary searched. This process is similar to how page faulting is done for kernel modules. The exception handler skips over faulting x86 instruction and initializes destination register with zero. This mimics exact behavior of bpf_probe_read (when probe_kernel_read faults dest is zeroed). JITs for other architectures can add support in similar way. Until then they will reject unknown opcode and fallback to interpreter. Since extable should be aligned and placed near JITed code make bpf_jit_binary_alloc() return 4 byte aligned image offset, so that extable aligning formula in bpf_int_jit_compile() doesn't need to rely on internal implementation of bpf_jit_binary_alloc(). On x86 gcc defaults to 16-byte alignment for regular kernel functions due to better performance. JITed code may be aligned to 16 in the future, but it will use 4 in the meantime. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20191016032505.2089704-10-ast@kernel.org --- include/linux/bpf.h | 3 +++ include/linux/extable.h | 10 ++++++++++ 2 files changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 028555fcd10d..a7330d75bb94 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -24,6 +24,7 @@ struct sock; struct seq_file; struct btf; struct btf_type; +struct exception_table_entry; extern struct idr btf_idr; extern spinlock_t btf_idr_lock; @@ -423,6 +424,8 @@ struct bpf_prog_aux { * main prog always has linfo_idx == 0 */ u32 linfo_idx; + u32 num_exentries; + struct exception_table_entry *extable; struct bpf_prog_stats __percpu *stats; union { struct work_struct work; diff --git a/include/linux/extable.h b/include/linux/extable.h index 81ecfaa83ad3..4ab9e78f313b 100644 --- a/include/linux/extable.h +++ b/include/linux/extable.h @@ -33,4 +33,14 @@ search_module_extables(unsigned long addr) } #endif /*CONFIG_MODULES*/ +#ifdef CONFIG_BPF_JIT +const struct exception_table_entry *search_bpf_extables(unsigned long addr); +#else +static inline const struct exception_table_entry * +search_bpf_extables(unsigned long addr) +{ + return NULL; +} +#endif + #endif /* _LINUX_EXTABLE_H */ -- cgit v1.2.3 From a7658e1a4164ce2b9eb4a11aadbba38586e93bd6 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 15 Oct 2019 20:25:04 -0700 Subject: bpf: Check types of arguments passed into helpers Introduce new helper that reuses existing skb perf_event output implementation, but can be called from raw_tracepoint programs that receive 'struct sk_buff *' as tracepoint argument or can walk other kernel data structures to skb pointer. In order to do that teach verifier to resolve true C types of bpf helpers into in-kernel BTF ids. The type of kernel pointer passed by raw tracepoint into bpf program will be tracked by the verifier all the way until it's passed into helper function. For example: kfree_skb() kernel function calls trace_kfree_skb(skb, loc); bpf programs receives that skb pointer and may eventually pass it into bpf_skb_output() bpf helper which in-kernel is implemented via bpf_skb_event_output() kernel function. Its first argument in the kernel is 'struct sk_buff *'. The verifier makes sure that types match all the way. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20191016032505.2089704-11-ast@kernel.org --- include/linux/bpf.h | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a7330d75bb94..2c2c29b49845 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -213,6 +213,7 @@ enum bpf_arg_type { ARG_PTR_TO_INT, /* pointer to int */ ARG_PTR_TO_LONG, /* pointer to long */ ARG_PTR_TO_SOCKET, /* pointer to bpf_sock (fullsock) */ + ARG_PTR_TO_BTF_ID, /* pointer to in-kernel struct */ }; /* type of values returned from helper functions */ @@ -235,11 +236,17 @@ struct bpf_func_proto { bool gpl_only; bool pkt_access; enum bpf_return_type ret_type; - enum bpf_arg_type arg1_type; - enum bpf_arg_type arg2_type; - enum bpf_arg_type arg3_type; - enum bpf_arg_type arg4_type; - enum bpf_arg_type arg5_type; + union { + struct { + enum bpf_arg_type arg1_type; + enum bpf_arg_type arg2_type; + enum bpf_arg_type arg3_type; + enum bpf_arg_type arg4_type; + enum bpf_arg_type arg5_type; + }; + enum bpf_arg_type arg_type[5]; + }; + u32 *btf_id; /* BTF ids of arguments */ }; /* bpf_context is intentionally undefined structure. Pointer to bpf_context is @@ -765,6 +772,7 @@ int btf_struct_access(struct bpf_verifier_log *log, const struct btf_type *t, int off, int size, enum bpf_access_type atype, u32 *next_btf_id); +u32 btf_resolve_helper_id(struct bpf_verifier_log *log, void *, int); #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) -- cgit v1.2.3 From 825dbc6ff7a3a063ea91be7d94af940080b0c991 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Tue, 15 Oct 2019 11:26:15 +0100 Subject: percpu: add __percpu to SHIFT_PERCPU_PTR The SHIFT_PERCPU_PTR() returns a pointer used by a number of functions that expect the pointer to be __percpu annotated (sparse address space 3). Adding __percpu to this makes the following sparse warnings go away. Note, this then creates the problem the __percup is marked as noderef, which may need removing for some of the internal functions, or to remove other warnings. mm/vmstat.c:385:13: warning: incorrect type in initializer (different address spaces) mm/vmstat.c:385:13: expected signed char [noderef] [usertype] *__p mm/vmstat.c:385:13: got signed char * mm/vmstat.c:385:13: warning: incorrect type in initializer (different address spaces) mm/vmstat.c:385:13: expected signed char [noderef] [usertype] *__p mm/vmstat.c:385:13: got signed char * mm/vmstat.c:385:13: warning: incorrect type in initializer (different address spaces) mm/vmstat.c:385:13: expected signed char [noderef] [usertype] *__p mm/vmstat.c:385:13: got signed char * mm/vmstat.c:385:13: warning: incorrect type in initializer (different address spaces) mm/vmstat.c:385:13: expected signed char [noderef] [usertype] *__p mm/vmstat.c:385:13: got signed char * mm/vmstat.c:401:13: warning: incorrect type in initializer (different address spaces) mm/vmstat.c:401:13: expected signed char [noderef] [usertype] *__p mm/vmstat.c:401:13: got signed char * mm/vmstat.c:401:13: warning: incorrect type in initializer (different address spaces) mm/vmstat.c:401:13: expected signed char [noderef] [usertype] *__p mm/vmstat.c:401:13: got signed char * mm/vmstat.c:401:13: warning: incorrect type in initializer (different address spaces) mm/vmstat.c:401:13: expected signed char [noderef] [usertype] *__p mm/vmstat.c:401:13: got signed char * mm/vmstat.c:401:13: warning: incorrect type in initializer (different address spaces) mm/vmstat.c:401:13: expected signed char [noderef] [usertype] *__p mm/vmstat.c:401:13: got signed char * mm/vmstat.c:429:13: warning: incorrect type in initializer (different address spaces) mm/vmstat.c:429:13: expected signed char [noderef] [usertype] *__p mm/vmstat.c:429:13: got signed char * mm/vmstat.c:429:13: warning: incorrect type in initializer (different address spaces) mm/vmstat.c:429:13: expected signed char [noderef] [usertype] *__p mm/vmstat.c:429:13: got signed char * mm/vmstat.c:429:13: warning: incorrect type in initializer (different address spaces) mm/vmstat.c:429:13: expected signed char [noderef] [usertype] *__p mm/vmstat.c:429:13: got signed char * mm/vmstat.c:429:13: warning: incorrect type in initializer (different address spaces) mm/vmstat.c:429:13: expected signed char [noderef] [usertype] *__p mm/vmstat.c:429:13: got signed char * mm/vmstat.c:445:13: warning: incorrect type in initializer (different address spaces) mm/vmstat.c:445:13: expected signed char [noderef] [usertype] *__p mm/vmstat.c:445:13: got signed char * mm/vmstat.c:445:13: warning: incorrect type in initializer (different address spaces) mm/vmstat.c:445:13: expected signed char [noderef] [usertype] *__p mm/vmstat.c:445:13: got signed char * mm/vmstat.c:445:13: warning: incorrect type in initializer (different address spaces) mm/vmstat.c:445:13: expected signed char [noderef] [usertype] *__p mm/vmstat.c:445:13: got signed char * mm/vmstat.c:445:13: warning: incorrect type in initializer (different address spaces) mm/vmstat.c:445:13: expected signed char [noderef] [usertype] *__p mm/vmstat.c:445:13: got signed char * mm/vmstat.c:763:29: warning: incorrect type in initializer (different address spaces) mm/vmstat.c:763:29: expected signed char [noderef] *__p mm/vmstat.c:763:29: got signed char * mm/vmstat.c:763:29: warning: incorrect type in initializer (different address spaces) mm/vmstat.c:763:29: expected signed char [noderef] *__p mm/vmstat.c:763:29: got signed char * mm/vmstat.c:763:29: warning: incorrect type in initializer (different address spaces) mm/vmstat.c:763:29: expected signed char [noderef] *__p mm/vmstat.c:763:29: got signed char * mm/vmstat.c:763:29: warning: incorrect type in initializer (different address spaces) mm/vmstat.c:763:29: expected signed char [noderef] *__p mm/vmstat.c:763:29: got signed char * mm/vmstat.c:825:29: warning: incorrect type in initializer (different address spaces) mm/vmstat.c:825:29: expected signed char [noderef] *__p mm/vmstat.c:825:29: got signed char * mm/vmstat.c:825:29: warning: incorrect type in initializer (different address spaces) mm/vmstat.c:825:29: expected signed char [noderef] *__p mm/vmstat.c:825:29: got signed char * mm/vmstat.c:825:29: warning: incorrect type in initializer (different address spaces) mm/vmstat.c:825:29: expected signed char [noderef] *__p mm/vmstat.c:825:29: got signed char * mm/vmstat.c:825:29: warning: incorrect type in initializer (different address spaces) mm/vmstat.c:825:29: expected signed char [noderef] *__p mm/vmstat.c:825:29: got signed char * Signed-off-by: Ben Dooks Signed-off-by: Dennis Zhou --- include/linux/percpu-defs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index a6fabd865211..a49b6c702598 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -229,7 +229,7 @@ do { \ * pointer value. The weird cast keeps both GCC and sparse happy. */ #define SHIFT_PERCPU_PTR(__p, __offset) \ - RELOC_HIDE((typeof(*(__p)) __kernel __force *)(__p), (__offset)) + RELOC_HIDE((typeof(*(__p)) __kernel __percpu __force *)(__p), (__offset)) #define per_cpu_ptr(ptr, cpu) \ ({ \ -- cgit v1.2.3 From da97e18458fb42d7c00fac5fd1c56a3896ec666e Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Mon, 14 Oct 2019 13:03:08 -0400 Subject: perf_event: Add support for LSM and SELinux checks In current mainline, the degree of access to perf_event_open(2) system call depends on the perf_event_paranoid sysctl. This has a number of limitations: 1. The sysctl is only a single value. Many types of accesses are controlled based on the single value thus making the control very limited and coarse grained. 2. The sysctl is global, so if the sysctl is changed, then that means all processes get access to perf_event_open(2) opening the door to security issues. This patch adds LSM and SELinux access checking which will be used in Android to access perf_event_open(2) for the purposes of attaching BPF programs to tracepoints, perf profiling and other operations from userspace. These operations are intended for production systems. 5 new LSM hooks are added: 1. perf_event_open: This controls access during the perf_event_open(2) syscall itself. The hook is called from all the places that the perf_event_paranoid sysctl is checked to keep it consistent with the systctl. The hook gets passed a 'type' argument which controls CPU, kernel and tracepoint accesses (in this context, CPU, kernel and tracepoint have the same semantics as the perf_event_paranoid sysctl). Additionally, I added an 'open' type which is similar to perf_event_paranoid sysctl == 3 patch carried in Android and several other distros but was rejected in mainline [1] in 2016. 2. perf_event_alloc: This allocates a new security object for the event which stores the current SID within the event. It will be useful when the perf event's FD is passed through IPC to another process which may try to read the FD. Appropriate security checks will limit access. 3. perf_event_free: Called when the event is closed. 4. perf_event_read: Called from the read(2) and mmap(2) syscalls for the event. 5. perf_event_write: Called from the ioctl(2) syscalls for the event. [1] https://lwn.net/Articles/696240/ Since Peter had suggest LSM hooks in 2016 [1], I am adding his Suggested-by tag below. To use this patch, we set the perf_event_paranoid sysctl to -1 and then apply selinux checking as appropriate (default deny everything, and then add policy rules to give access to domains that need it). In the future we can remove the perf_event_paranoid sysctl altogether. Suggested-by: Peter Zijlstra Co-developed-by: Peter Zijlstra Signed-off-by: Joel Fernandes (Google) Signed-off-by: Peter Zijlstra (Intel) Acked-by: James Morris Cc: Arnaldo Carvalho de Melo Cc: rostedt@goodmis.org Cc: Yonghong Song Cc: Kees Cook Cc: Ingo Molnar Cc: Alexei Starovoitov Cc: jeffv@google.com Cc: Jiri Olsa Cc: Daniel Borkmann Cc: primiano@google.com Cc: Song Liu Cc: rsavitski@google.com Cc: Namhyung Kim Cc: Matthew Garrett Link: https://lkml.kernel.org/r/20191014170308.70668-1-joel@joelfernandes.org --- include/linux/lsm_hooks.h | 15 +++++++++++++++ include/linux/perf_event.h | 36 +++++++++++++++++++++++++++++++----- include/linux/security.h | 38 +++++++++++++++++++++++++++++++++++++- 3 files changed, 83 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index a3763247547c..20d8cf194fb7 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1818,6 +1818,14 @@ union security_list_options { void (*bpf_prog_free_security)(struct bpf_prog_aux *aux); #endif /* CONFIG_BPF_SYSCALL */ int (*locked_down)(enum lockdown_reason what); +#ifdef CONFIG_PERF_EVENTS + int (*perf_event_open)(struct perf_event_attr *attr, int type); + int (*perf_event_alloc)(struct perf_event *event); + void (*perf_event_free)(struct perf_event *event); + int (*perf_event_read)(struct perf_event *event); + int (*perf_event_write)(struct perf_event *event); + +#endif }; struct security_hook_heads { @@ -2060,6 +2068,13 @@ struct security_hook_heads { struct hlist_head bpf_prog_free_security; #endif /* CONFIG_BPF_SYSCALL */ struct hlist_head locked_down; +#ifdef CONFIG_PERF_EVENTS + struct hlist_head perf_event_open; + struct hlist_head perf_event_alloc; + struct hlist_head perf_event_free; + struct hlist_head perf_event_read; + struct hlist_head perf_event_write; +#endif } __randomize_layout; /* diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 61448c19a132..587ae4d002f5 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -56,6 +56,7 @@ struct perf_guest_info_callbacks { #include #include #include +#include #include struct perf_callchain_entry { @@ -721,6 +722,9 @@ struct perf_event { struct perf_cgroup *cgrp; /* cgroup event is attach to */ #endif +#ifdef CONFIG_SECURITY + void *security; +#endif struct list_head sb_list; #endif /* CONFIG_PERF_EVENTS */ }; @@ -1241,19 +1245,41 @@ extern int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write, int perf_event_max_stack_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); -static inline bool perf_paranoid_tracepoint_raw(void) +/* Access to perf_event_open(2) syscall. */ +#define PERF_SECURITY_OPEN 0 + +/* Finer grained perf_event_open(2) access control. */ +#define PERF_SECURITY_CPU 1 +#define PERF_SECURITY_KERNEL 2 +#define PERF_SECURITY_TRACEPOINT 3 + +static inline int perf_is_paranoid(void) { return sysctl_perf_event_paranoid > -1; } -static inline bool perf_paranoid_cpu(void) +static inline int perf_allow_kernel(struct perf_event_attr *attr) { - return sysctl_perf_event_paranoid > 0; + if (sysctl_perf_event_paranoid > 1 && !capable(CAP_SYS_ADMIN)) + return -EACCES; + + return security_perf_event_open(attr, PERF_SECURITY_KERNEL); } -static inline bool perf_paranoid_kernel(void) +static inline int perf_allow_cpu(struct perf_event_attr *attr) { - return sysctl_perf_event_paranoid > 1; + if (sysctl_perf_event_paranoid > 0 && !capable(CAP_SYS_ADMIN)) + return -EACCES; + + return security_perf_event_open(attr, PERF_SECURITY_CPU); +} + +static inline int perf_allow_tracepoint(struct perf_event_attr *attr) +{ + if (sysctl_perf_event_paranoid > -1 && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + return security_perf_event_open(attr, PERF_SECURITY_TRACEPOINT); } extern void perf_event_init(void); diff --git a/include/linux/security.h b/include/linux/security.h index a8d59d612d27..4df79ffdc3a0 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1894,5 +1894,41 @@ static inline void security_bpf_prog_free(struct bpf_prog_aux *aux) #endif /* CONFIG_SECURITY */ #endif /* CONFIG_BPF_SYSCALL */ -#endif /* ! __LINUX_SECURITY_H */ +#ifdef CONFIG_PERF_EVENTS +struct perf_event_attr; + +#ifdef CONFIG_SECURITY +extern int security_perf_event_open(struct perf_event_attr *attr, int type); +extern int security_perf_event_alloc(struct perf_event *event); +extern void security_perf_event_free(struct perf_event *event); +extern int security_perf_event_read(struct perf_event *event); +extern int security_perf_event_write(struct perf_event *event); +#else +static inline int security_perf_event_open(struct perf_event_attr *attr, + int type) +{ + return 0; +} + +static inline int security_perf_event_alloc(struct perf_event *event) +{ + return 0; +} + +static inline void security_perf_event_free(struct perf_event *event) +{ +} + +static inline int security_perf_event_read(struct perf_event *event) +{ + return 0; +} +static inline int security_perf_event_write(struct perf_event *event) +{ + return 0; +} +#endif /* CONFIG_SECURITY */ +#endif /* CONFIG_PERF_EVENTS */ + +#endif /* ! __LINUX_SECURITY_H */ -- cgit v1.2.3 From 7684e2c4384d5d1f884b01ab8bff2369e4db0bff Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 17 Oct 2019 13:12:01 -0700 Subject: iomap: iomap that extends beyond EOF should be marked dirty When doing a direct IO that spans the current EOF, and there are written blocks beyond EOF that extend beyond the current write, the only metadata update that needs to be done is a file size extension. However, we don't mark such iomaps as IOMAP_F_DIRTY to indicate that there is IO completion metadata updates required, and hence we may fail to correctly sync file size extensions made in IO completion when O_DSYNC writes are being used and the hardware supports FUA. Hence when setting IOMAP_F_DIRTY, we need to also take into account whether the iomap spans the current EOF. If it does, then we need to mark it dirty so that IO completion will call generic_write_sync() to flush the inode size update to stable storage correctly. Fixes: 3460cac1ca76 ("iomap: Use FUA for pure data O_DSYNC DIO writes") Signed-off-by: Dave Chinner Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong [darrick: removed the ext4 part; they'll handle it separately] Signed-off-by: Darrick J. Wong --- include/linux/iomap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 76b14cb729dc..4b25ad6b5edd 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -32,6 +32,8 @@ struct vm_fault; * * IOMAP_F_DIRTY indicates the inode has uncommitted metadata needed to access * written data and requires fdatasync to commit them to persistent storage. + * This needs to take into account metadata changes that *may* be made at IO + * completion, such as file size updates from direct IO. */ #define IOMAP_F_NEW 0x01 /* blocks have been newly allocated */ #define IOMAP_F_DIRTY 0x02 /* uncommitted metadata */ -- cgit v1.2.3 From ffedeeb780dc554eff3d3b16e6a462a26a41d7ec Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 11 Oct 2019 13:50:41 +0200 Subject: linkage: Introduce new macros for assembler symbols Introduce new C macros for annotations of functions and data in assembly. There is a long-standing mess in macros like ENTRY, END, ENDPROC and similar. They are used in different manners and sometimes incorrectly. So introduce macros with clear use to annotate assembly as follows: a) Support macros for the ones below SYM_T_FUNC -- type used by assembler to mark functions SYM_T_OBJECT -- type used by assembler to mark data SYM_T_NONE -- type used by assembler to mark entries of unknown type They are defined as STT_FUNC, STT_OBJECT, and STT_NOTYPE respectively. According to the gas manual, this is the most portable way. I am not sure about other assemblers, so this can be switched back to %function and %object if this turns into a problem. Architectures can also override them by something like ", @function" if they need. SYM_A_ALIGN, SYM_A_NONE -- align the symbol? SYM_L_GLOBAL, SYM_L_WEAK, SYM_L_LOCAL -- linkage of symbols b) Mostly internal annotations, used by the ones below SYM_ENTRY -- use only if you have to (for non-paired symbols) SYM_START -- use only if you have to (for paired symbols) SYM_END -- use only if you have to (for paired symbols) c) Annotations for code SYM_INNER_LABEL_ALIGN -- only for labels in the middle of code SYM_INNER_LABEL -- only for labels in the middle of code SYM_FUNC_START_LOCAL_ALIAS -- use where there are two local names for one function SYM_FUNC_START_ALIAS -- use where there are two global names for one function SYM_FUNC_END_ALIAS -- the end of LOCAL_ALIASed or ALIASed function SYM_FUNC_START -- use for global functions SYM_FUNC_START_NOALIGN -- use for global functions, w/o alignment SYM_FUNC_START_LOCAL -- use for local functions SYM_FUNC_START_LOCAL_NOALIGN -- use for local functions, w/o alignment SYM_FUNC_START_WEAK -- use for weak functions SYM_FUNC_START_WEAK_NOALIGN -- use for weak functions, w/o alignment SYM_FUNC_END -- the end of SYM_FUNC_START_LOCAL, SYM_FUNC_START, SYM_FUNC_START_WEAK, ... For functions with special (non-C) calling conventions: SYM_CODE_START -- use for non-C (special) functions SYM_CODE_START_NOALIGN -- use for non-C (special) functions, w/o alignment SYM_CODE_START_LOCAL -- use for local non-C (special) functions SYM_CODE_START_LOCAL_NOALIGN -- use for local non-C (special) functions, w/o alignment SYM_CODE_END -- the end of SYM_CODE_START_LOCAL or SYM_CODE_START d) For data SYM_DATA_START -- global data symbol SYM_DATA_START_LOCAL -- local data symbol SYM_DATA_END -- the end of the SYM_DATA_START symbol SYM_DATA_END_LABEL -- the labeled end of SYM_DATA_START symbol SYM_DATA -- start+end wrapper around simple global data SYM_DATA_LOCAL -- start+end wrapper around simple local data ========== The macros allow to pair starts and ends of functions and mark functions correctly in the output ELF objects. All users of the old macros in x86 are converted to use these in further patches. Signed-off-by: Jiri Slaby Signed-off-by: Borislav Petkov Acked-by: Rafael J. Wysocki Cc: Andrew Morton Cc: Andrey Ryabinin Cc: Boris Ostrovsky Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Len Brown Cc: Linus Torvalds Cc: linux-arch@vger.kernel.org Cc: linux-doc@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-pm@vger.kernel.org Cc: Mark Rutland Cc: Pavel Machek Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Cc: x86-ml Cc: xen-devel@lists.xenproject.org Link: https://lkml.kernel.org/r/20191011115108.12392-2-jslaby@suse.cz --- include/linux/linkage.h | 245 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 237 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/linkage.h b/include/linux/linkage.h index 7e020782ade2..f3ae8f3dea2c 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -75,32 +75,58 @@ #ifdef __ASSEMBLY__ +/* SYM_T_FUNC -- type used by assembler to mark functions */ +#ifndef SYM_T_FUNC +#define SYM_T_FUNC STT_FUNC +#endif + +/* SYM_T_OBJECT -- type used by assembler to mark data */ +#ifndef SYM_T_OBJECT +#define SYM_T_OBJECT STT_OBJECT +#endif + +/* SYM_T_NONE -- type used by assembler to mark entries of unknown type */ +#ifndef SYM_T_NONE +#define SYM_T_NONE STT_NOTYPE +#endif + +/* SYM_A_* -- align the symbol? */ +#define SYM_A_ALIGN ALIGN +#define SYM_A_NONE /* nothing */ + +/* SYM_L_* -- linkage of symbols */ +#define SYM_L_GLOBAL(name) .globl name +#define SYM_L_WEAK(name) .weak name +#define SYM_L_LOCAL(name) /* nothing */ + #ifndef LINKER_SCRIPT #define ALIGN __ALIGN #define ALIGN_STR __ALIGN_STR +/* === DEPRECATED annotations === */ + #ifndef GLOBAL +/* deprecated, use SYM_DATA*, SYM_ENTRY, or similar */ #define GLOBAL(name) \ .globl name ASM_NL \ name: #endif #ifndef ENTRY +/* deprecated, use SYM_FUNC_START */ #define ENTRY(name) \ - .globl name ASM_NL \ - ALIGN ASM_NL \ - name: + SYM_FUNC_START(name) #endif #endif /* LINKER_SCRIPT */ #ifndef WEAK +/* deprecated, use SYM_FUNC_START_WEAK* */ #define WEAK(name) \ - .weak name ASM_NL \ - ALIGN ASM_NL \ - name: + SYM_FUNC_START_WEAK(name) #endif #ifndef END +/* deprecated, use SYM_FUNC_END, SYM_DATA_END, or SYM_END */ #define END(name) \ .size name, .-name #endif @@ -110,11 +136,214 @@ * static analysis tools such as stack depth analyzer. */ #ifndef ENDPROC +/* deprecated, use SYM_FUNC_END */ #define ENDPROC(name) \ - .type name, @function ASM_NL \ - END(name) + SYM_FUNC_END(name) +#endif + +/* === generic annotations === */ + +/* SYM_ENTRY -- use only if you have to for non-paired symbols */ +#ifndef SYM_ENTRY +#define SYM_ENTRY(name, linkage, align...) \ + linkage(name) ASM_NL \ + align ASM_NL \ + name: +#endif + +/* SYM_START -- use only if you have to */ +#ifndef SYM_START +#define SYM_START(name, linkage, align...) \ + SYM_ENTRY(name, linkage, align) +#endif + +/* SYM_END -- use only if you have to */ +#ifndef SYM_END +#define SYM_END(name, sym_type) \ + .type name sym_type ASM_NL \ + .size name, .-name +#endif + +/* === code annotations === */ + +/* + * FUNC -- C-like functions (proper stack frame etc.) + * CODE -- non-C code (e.g. irq handlers with different, special stack etc.) + * + * Objtool validates stack for FUNC, but not for CODE. + * Objtool generates debug info for both FUNC & CODE, but needs special + * annotations for each CODE's start (to describe the actual stack frame). + * + * ALIAS -- does not generate debug info -- the aliased function will + */ + +/* SYM_INNER_LABEL_ALIGN -- only for labels in the middle of code */ +#ifndef SYM_INNER_LABEL_ALIGN +#define SYM_INNER_LABEL_ALIGN(name, linkage) \ + .type name SYM_T_NONE ASM_NL \ + SYM_ENTRY(name, linkage, SYM_A_ALIGN) +#endif + +/* SYM_INNER_LABEL -- only for labels in the middle of code */ +#ifndef SYM_INNER_LABEL +#define SYM_INNER_LABEL(name, linkage) \ + .type name SYM_T_NONE ASM_NL \ + SYM_ENTRY(name, linkage, SYM_A_NONE) +#endif + +/* + * SYM_FUNC_START_LOCAL_ALIAS -- use where there are two local names for one + * function + */ +#ifndef SYM_FUNC_START_LOCAL_ALIAS +#define SYM_FUNC_START_LOCAL_ALIAS(name) \ + SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN) +#endif + +/* + * SYM_FUNC_START_ALIAS -- use where there are two global names for one + * function + */ +#ifndef SYM_FUNC_START_ALIAS +#define SYM_FUNC_START_ALIAS(name) \ + SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) +#endif + +/* SYM_FUNC_START -- use for global functions */ +#ifndef SYM_FUNC_START +/* + * The same as SYM_FUNC_START_ALIAS, but we will need to distinguish these two + * later. + */ +#define SYM_FUNC_START(name) \ + SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) +#endif + +/* SYM_FUNC_START_NOALIGN -- use for global functions, w/o alignment */ +#ifndef SYM_FUNC_START_NOALIGN +#define SYM_FUNC_START_NOALIGN(name) \ + SYM_START(name, SYM_L_GLOBAL, SYM_A_NONE) +#endif + +/* SYM_FUNC_START_LOCAL -- use for local functions */ +#ifndef SYM_FUNC_START_LOCAL +/* the same as SYM_FUNC_START_LOCAL_ALIAS, see comment near SYM_FUNC_START */ +#define SYM_FUNC_START_LOCAL(name) \ + SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN) #endif +/* SYM_FUNC_START_LOCAL_NOALIGN -- use for local functions, w/o alignment */ +#ifndef SYM_FUNC_START_LOCAL_NOALIGN +#define SYM_FUNC_START_LOCAL_NOALIGN(name) \ + SYM_START(name, SYM_L_LOCAL, SYM_A_NONE) #endif +/* SYM_FUNC_START_WEAK -- use for weak functions */ +#ifndef SYM_FUNC_START_WEAK +#define SYM_FUNC_START_WEAK(name) \ + SYM_START(name, SYM_L_WEAK, SYM_A_ALIGN) #endif + +/* SYM_FUNC_START_WEAK_NOALIGN -- use for weak functions, w/o alignment */ +#ifndef SYM_FUNC_START_WEAK_NOALIGN +#define SYM_FUNC_START_WEAK_NOALIGN(name) \ + SYM_START(name, SYM_L_WEAK, SYM_A_NONE) +#endif + +/* SYM_FUNC_END_ALIAS -- the end of LOCAL_ALIASed or ALIASed function */ +#ifndef SYM_FUNC_END_ALIAS +#define SYM_FUNC_END_ALIAS(name) \ + SYM_END(name, SYM_T_FUNC) +#endif + +/* + * SYM_FUNC_END -- the end of SYM_FUNC_START_LOCAL, SYM_FUNC_START, + * SYM_FUNC_START_WEAK, ... + */ +#ifndef SYM_FUNC_END +/* the same as SYM_FUNC_END_ALIAS, see comment near SYM_FUNC_START */ +#define SYM_FUNC_END(name) \ + SYM_END(name, SYM_T_FUNC) +#endif + +/* SYM_CODE_START -- use for non-C (special) functions */ +#ifndef SYM_CODE_START +#define SYM_CODE_START(name) \ + SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) +#endif + +/* SYM_CODE_START_NOALIGN -- use for non-C (special) functions, w/o alignment */ +#ifndef SYM_CODE_START_NOALIGN +#define SYM_CODE_START_NOALIGN(name) \ + SYM_START(name, SYM_L_GLOBAL, SYM_A_NONE) +#endif + +/* SYM_CODE_START_LOCAL -- use for local non-C (special) functions */ +#ifndef SYM_CODE_START_LOCAL +#define SYM_CODE_START_LOCAL(name) \ + SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN) +#endif + +/* + * SYM_CODE_START_LOCAL_NOALIGN -- use for local non-C (special) functions, + * w/o alignment + */ +#ifndef SYM_CODE_START_LOCAL_NOALIGN +#define SYM_CODE_START_LOCAL_NOALIGN(name) \ + SYM_START(name, SYM_L_LOCAL, SYM_A_NONE) +#endif + +/* SYM_CODE_END -- the end of SYM_CODE_START_LOCAL, SYM_CODE_START, ... */ +#ifndef SYM_CODE_END +#define SYM_CODE_END(name) \ + SYM_END(name, SYM_T_NONE) +#endif + +/* === data annotations === */ + +/* SYM_DATA_START -- global data symbol */ +#ifndef SYM_DATA_START +#define SYM_DATA_START(name) \ + SYM_START(name, SYM_L_GLOBAL, SYM_A_NONE) +#endif + +/* SYM_DATA_START -- local data symbol */ +#ifndef SYM_DATA_START_LOCAL +#define SYM_DATA_START_LOCAL(name) \ + SYM_START(name, SYM_L_LOCAL, SYM_A_NONE) +#endif + +/* SYM_DATA_END -- the end of SYM_DATA_START symbol */ +#ifndef SYM_DATA_END +#define SYM_DATA_END(name) \ + SYM_END(name, SYM_T_OBJECT) +#endif + +/* SYM_DATA_END_LABEL -- the labeled end of SYM_DATA_START symbol */ +#ifndef SYM_DATA_END_LABEL +#define SYM_DATA_END_LABEL(name, linkage, label) \ + linkage(label) ASM_NL \ + .type label SYM_T_OBJECT ASM_NL \ + label: \ + SYM_END(name, SYM_T_OBJECT) +#endif + +/* SYM_DATA -- start+end wrapper around simple global data */ +#ifndef SYM_DATA +#define SYM_DATA(name, data...) \ + SYM_DATA_START(name) ASM_NL \ + data ASM_NL \ + SYM_DATA_END(name) +#endif + +/* SYM_DATA_LOCAL -- start+end wrapper around simple local data */ +#ifndef SYM_DATA_LOCAL +#define SYM_DATA_LOCAL(name, data...) \ + SYM_DATA_START_LOCAL(name) ASM_NL \ + data ASM_NL \ + SYM_DATA_END(name) +#endif + +#endif /* __ASSEMBLY__ */ + +#endif /* _LINUX_LINKAGE_H */ -- cgit v1.2.3 From b4edca150106a68d05eaf823d665a355ff19e28b Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 11 Oct 2019 13:50:59 +0200 Subject: x86/asm: Remove the last GLOBAL user and remove the macro Convert the remaining 32bit users and remove the GLOBAL macro finally. In particular, this means to use SYM_ENTRY for the singlestepping hack region. Exclude the global definition of GLOBAL from x86 too. Signed-off-by: Jiri Slaby Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: linux-arch@vger.kernel.org Cc: Mark Rutland Cc: "Rafael J. Wysocki" Cc: Thomas Gleixner Cc: Will Deacon Cc: x86-ml Link: https://lkml.kernel.org/r/20191011115108.12392-20-jslaby@suse.cz --- include/linux/linkage.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/linkage.h b/include/linux/linkage.h index f3ae8f3dea2c..cb1108dde385 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -105,12 +105,14 @@ /* === DEPRECATED annotations === */ +#ifndef CONFIG_X86 #ifndef GLOBAL /* deprecated, use SYM_DATA*, SYM_ENTRY, or similar */ #define GLOBAL(name) \ .globl name ASM_NL \ name: #endif +#endif #ifndef ENTRY /* deprecated, use SYM_FUNC_START */ -- cgit v1.2.3 From 6dcc5627f6aec4cb1d1494d06a48d8061db06a04 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 11 Oct 2019 13:51:04 +0200 Subject: x86/asm: Change all ENTRY+ENDPROC to SYM_FUNC_* MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These are all functions which are invoked from elsewhere, so annotate them as global using the new SYM_FUNC_START and their ENDPROC's by SYM_FUNC_END. Make sure ENTRY/ENDPROC is not defined on X86_64, given these were the last users. Signed-off-by: Jiri Slaby Signed-off-by: Borislav Petkov Reviewed-by: Rafael J. Wysocki [hibernate] Reviewed-by: Boris Ostrovsky [xen bits] Acked-by: Herbert Xu [crypto] Cc: Allison Randal Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Andy Shevchenko Cc: Ard Biesheuvel Cc: Armijn Hemel Cc: Cao jin Cc: Darren Hart Cc: Dave Hansen Cc: "David S. Miller" Cc: Enrico Weigelt Cc: Greg Kroah-Hartman Cc: Herbert Xu Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jim Mattson Cc: Joerg Roedel Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Kate Stewart Cc: "Kirill A. Shutemov" Cc: kvm ML Cc: Len Brown Cc: linux-arch@vger.kernel.org Cc: linux-crypto@vger.kernel.org Cc: linux-efi Cc: linux-efi@vger.kernel.org Cc: linux-pm@vger.kernel.org Cc: Mark Rutland Cc: Matt Fleming Cc: Paolo Bonzini Cc: Pavel Machek Cc: Peter Zijlstra Cc: platform-driver-x86@vger.kernel.org Cc: "Radim Krčmář" Cc: Sean Christopherson Cc: Stefano Stabellini Cc: "Steven Rostedt (VMware)" Cc: Thomas Gleixner Cc: Vitaly Kuznetsov Cc: Wanpeng Li Cc: Wei Huang Cc: x86-ml Cc: xen-devel@lists.xenproject.org Cc: Xiaoyao Li Link: https://lkml.kernel.org/r/20191011115108.12392-25-jslaby@suse.cz --- include/linux/linkage.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/linkage.h b/include/linux/linkage.h index cb1108dde385..19f3d796ab5b 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -114,11 +114,13 @@ #endif #endif +#ifndef CONFIG_X86_64 #ifndef ENTRY /* deprecated, use SYM_FUNC_START */ #define ENTRY(name) \ SYM_FUNC_START(name) #endif +#endif /* CONFIG_X86_64 */ #endif /* LINKER_SCRIPT */ #ifndef WEAK @@ -133,6 +135,7 @@ .size name, .-name #endif +#ifndef CONFIG_X86_64 /* If symbol 'name' is treated as a subroutine (gets called, and returns) * then please use ENDPROC to mark 'name' as STT_FUNC for the benefit of * static analysis tools such as stack depth analyzer. @@ -142,6 +145,7 @@ #define ENDPROC(name) \ SYM_FUNC_END(name) #endif +#endif /* CONFIG_X86_64 */ /* === generic annotations === */ -- cgit v1.2.3 From 5e63306f1629527799e34a9814dd8035df6ca854 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 11 Oct 2019 13:51:06 +0200 Subject: x86/asm/32: Change all ENTRY+END to SYM_CODE_* Change all assembly code which is marked using END (and not ENDPROC) to appropriate new markings SYM_CODE_START and SYM_CODE_END. And since the last user of END on X86 is gone now, make sure that END is not defined there. Signed-off-by: Jiri Slaby Signed-off-by: Borislav Petkov Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Herbert Xu Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Josh Poimboeuf Cc: linux-arch@vger.kernel.org Cc: Mark Rutland Cc: Peter Zijlstra Cc: "Rafael J. Wysocki" Cc: "Steven Rostedt (VMware)" Cc: Thomas Gleixner Cc: x86-ml Link: https://lkml.kernel.org/r/20191011115108.12392-27-jslaby@suse.cz --- include/linux/linkage.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/linkage.h b/include/linux/linkage.h index 19f3d796ab5b..5ffcf72c8f87 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -129,11 +129,13 @@ SYM_FUNC_START_WEAK(name) #endif +#ifndef CONFIG_X86 #ifndef END /* deprecated, use SYM_FUNC_END, SYM_DATA_END, or SYM_END */ #define END(name) \ .size name, .-name #endif +#endif /* CONFIG_X86 */ #ifndef CONFIG_X86_64 /* If symbol 'name' is treated as a subroutine (gets called, and returns) -- cgit v1.2.3 From 6d685e5318e51b843ca50adeca50dc6300bf2cbb Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 11 Oct 2019 13:51:07 +0200 Subject: x86/asm/32: Change all ENTRY+ENDPROC to SYM_FUNC_* These are all functions which are invoked from elsewhere, so annotate them as global using the new SYM_FUNC_START and their ENDPROC's by SYM_FUNC_END. Now, ENTRY/ENDPROC can be forced to be undefined on X86, so do so. Signed-off-by: Jiri Slaby Signed-off-by: Borislav Petkov Cc: Allison Randal Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Andy Shevchenko Cc: Ard Biesheuvel Cc: Bill Metzenthen Cc: Boris Ostrovsky Cc: Darren Hart Cc: "David S. Miller" Cc: Greg Kroah-Hartman Cc: Herbert Xu Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: linux-arch@vger.kernel.org Cc: linux-crypto@vger.kernel.org Cc: linux-efi Cc: linux-efi@vger.kernel.org Cc: linux-pm@vger.kernel.org Cc: Mark Rutland Cc: Matt Fleming Cc: Pavel Machek Cc: platform-driver-x86@vger.kernel.org Cc: "Rafael J. Wysocki" Cc: Thomas Gleixner Cc: Will Deacon Cc: x86-ml Link: https://lkml.kernel.org/r/20191011115108.12392-28-jslaby@suse.cz --- include/linux/linkage.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/linkage.h b/include/linux/linkage.h index 5ffcf72c8f87..331a2306312c 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -112,15 +112,13 @@ .globl name ASM_NL \ name: #endif -#endif -#ifndef CONFIG_X86_64 #ifndef ENTRY /* deprecated, use SYM_FUNC_START */ #define ENTRY(name) \ SYM_FUNC_START(name) #endif -#endif /* CONFIG_X86_64 */ +#endif /* CONFIG_X86 */ #endif /* LINKER_SCRIPT */ #ifndef WEAK @@ -135,9 +133,7 @@ #define END(name) \ .size name, .-name #endif -#endif /* CONFIG_X86 */ -#ifndef CONFIG_X86_64 /* If symbol 'name' is treated as a subroutine (gets called, and returns) * then please use ENDPROC to mark 'name' as STT_FUNC for the benefit of * static analysis tools such as stack depth analyzer. @@ -147,7 +143,7 @@ #define ENDPROC(name) \ SYM_FUNC_END(name) #endif -#endif /* CONFIG_X86_64 */ +#endif /* CONFIG_X86 */ /* === generic annotations === */ -- cgit v1.2.3 From 13fbe784ef6e58d0267a6e183f90ce7826d7d885 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 11 Oct 2019 13:51:08 +0200 Subject: x86/asm: Replace WEAK uses by SYM_INNER_LABEL_ALIGN Use the new SYM_INNER_LABEL_ALIGN for WEAK entries in the middle of x86 assembly functions. And make sure WEAK is not defined for x86 anymore as these were the last users. Signed-off-by: Jiri Slaby Signed-off-by: Borislav Petkov Cc: Andrey Ryabinin Cc: Boris Ostrovsky Cc: Herbert Xu Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Josh Poimboeuf Cc: linux-arch@vger.kernel.org Cc: Mark Rutland Cc: Peter Zijlstra Cc: "Rafael J. Wysocki" Cc: "Steven Rostedt (VMware)" Cc: Thomas Gleixner Cc: x86-ml Link: https://lkml.kernel.org/r/20191011115108.12392-29-jslaby@suse.cz --- include/linux/linkage.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/linkage.h b/include/linux/linkage.h index 331a2306312c..9280209d1f62 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -121,13 +121,13 @@ #endif /* CONFIG_X86 */ #endif /* LINKER_SCRIPT */ +#ifndef CONFIG_X86 #ifndef WEAK /* deprecated, use SYM_FUNC_START_WEAK* */ #define WEAK(name) \ SYM_FUNC_START_WEAK(name) #endif -#ifndef CONFIG_X86 #ifndef END /* deprecated, use SYM_FUNC_END, SYM_DATA_END, or SYM_END */ #define END(name) \ -- cgit v1.2.3 From 036beb0e85f8772acb635c30df573876103b0c21 Mon Sep 17 00:00:00 2001 From: Tzung-Bi Shih Date: Mon, 14 Oct 2019 18:20:13 +0800 Subject: platform/chrome: cros_ec: remove unused EC feature Remove unused EC_FEATURE_AUDIO_CODEC. Signed-off-by: Tzung-Bi Shih Acked-By: Benson Leung Link: https://lore.kernel.org/r/20191014180059.01.I374c311eaca0d47944a37b07acbe48fdb74f734d@changeid Signed-off-by: Mark Brown --- include/linux/platform_data/cros_ec_commands.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index 98415686cbfa..43b8f7dae4cc 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -1277,8 +1277,6 @@ enum ec_feature_code { * MOTIONSENSE_CMD_TABLET_MODE_LID_ANGLE. */ EC_FEATURE_REFINED_TABLET_MODE_HYSTERESIS = 37, - /* EC supports audio codec. */ - EC_FEATURE_AUDIO_CODEC = 38, /* The MCU is a System Companion Processor (SCP). */ EC_FEATURE_SCP = 39, /* The MCU is an Integrated Sensor Hub */ -- cgit v1.2.3 From 727f1c71c780789aeb8f3da2596c65ae008d5d6c Mon Sep 17 00:00:00 2001 From: Tzung-Bi Shih Date: Mon, 14 Oct 2019 18:20:14 +0800 Subject: ASoC: cros_ec_codec: refactor I2S RX Refactor by the following items: - reformat copyright declaration - use more specific name "i2s rx" - use verbose symbol names to separate namespaces - make some short functions inline - remove unused TDM-related code Signed-off-by: Tzung-Bi Shih Acked-By: Benson Leung Link: https://lore.kernel.org/r/20191014180059.02.I43373b9a66dbb70196b3f216b3aa86111c410836@changeid Signed-off-by: Mark Brown --- include/linux/platform_data/cros_ec_commands.h | 120 +++++++++++-------------- 1 file changed, 51 insertions(+), 69 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index 43b8f7dae4cc..261ac83bd007 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -4466,92 +4466,74 @@ enum mkbp_cec_event { /*****************************************************************************/ -/* Commands for I2S recording on audio codec. */ - -#define EC_CMD_CODEC_I2S 0x00BC -#define EC_WOV_I2S_SAMPLE_RATE 48000 - -enum ec_codec_i2s_subcmd { - EC_CODEC_SET_SAMPLE_DEPTH = 0x0, - EC_CODEC_SET_GAIN = 0x1, - EC_CODEC_GET_GAIN = 0x2, - EC_CODEC_I2S_ENABLE = 0x3, - EC_CODEC_I2S_SET_CONFIG = 0x4, - EC_CODEC_I2S_SET_TDM_CONFIG = 0x5, - EC_CODEC_I2S_SET_BCLK = 0x6, - EC_CODEC_I2S_SUBCMD_COUNT = 0x7, +/* Commands for I2S RX on audio codec. */ + +#define EC_CMD_EC_CODEC_I2S_RX 0x00BC + +enum ec_codec_i2s_rx_subcmd { + EC_CODEC_I2S_RX_ENABLE = 0x0, + EC_CODEC_I2S_RX_DISABLE = 0x1, + EC_CODEC_I2S_RX_SET_GAIN = 0x2, + EC_CODEC_I2S_RX_GET_GAIN = 0x3, + EC_CODEC_I2S_RX_SET_SAMPLE_DEPTH = 0x4, + EC_CODEC_I2S_RX_SET_DAIFMT = 0x5, + EC_CODEC_I2S_RX_SET_BCLK = 0x6, + EC_CODEC_I2S_RX_SUBCMD_COUNT, }; -enum ec_sample_depth_value { - EC_CODEC_SAMPLE_DEPTH_16 = 0, - EC_CODEC_SAMPLE_DEPTH_24 = 1, +enum ec_codec_i2s_rx_sample_depth { + EC_CODEC_I2S_RX_SAMPLE_DEPTH_16 = 0x0, + EC_CODEC_I2S_RX_SAMPLE_DEPTH_24 = 0x1, + EC_CODEC_I2S_RX_SAMPLE_DEPTH_COUNT, }; -enum ec_i2s_config { - EC_DAI_FMT_I2S = 0, - EC_DAI_FMT_RIGHT_J = 1, - EC_DAI_FMT_LEFT_J = 2, - EC_DAI_FMT_PCM_A = 3, - EC_DAI_FMT_PCM_B = 4, - EC_DAI_FMT_PCM_TDM = 5, +enum ec_codec_i2s_rx_daifmt { + EC_CODEC_I2S_RX_DAIFMT_I2S = 0x0, + EC_CODEC_I2S_RX_DAIFMT_RIGHT_J = 0x1, + EC_CODEC_I2S_RX_DAIFMT_LEFT_J = 0x2, + EC_CODEC_I2S_RX_DAIFMT_COUNT, }; -/* - * For subcommand EC_CODEC_GET_GAIN. - */ -struct __ec_align1 ec_codec_i2s_gain { +struct __ec_align1 ec_param_ec_codec_i2s_rx_set_sample_depth { + uint8_t depth; + uint8_t reserved[3]; +}; + +struct __ec_align1 ec_param_ec_codec_i2s_rx_set_gain { uint8_t left; uint8_t right; + uint8_t reserved[2]; }; -struct __ec_todo_unpacked ec_param_codec_i2s_tdm { - int16_t ch0_delay; /* 0 to 496 */ - int16_t ch1_delay; /* -1 to 496 */ - uint8_t adjacent_to_ch0; - uint8_t adjacent_to_ch1; +struct __ec_align1 ec_param_ec_codec_i2s_rx_set_daifmt { + uint8_t daifmt; + uint8_t reserved[3]; }; -struct __ec_todo_packed ec_param_codec_i2s { - /* enum ec_codec_i2s_subcmd */ - uint8_t cmd; - union { - /* - * EC_CODEC_SET_SAMPLE_DEPTH - * Value should be one of ec_sample_depth_value. - */ - uint8_t depth; - - /* - * EC_CODEC_SET_GAIN - * Value should be 0~43 for both channels. - */ - struct ec_codec_i2s_gain gain; - - /* - * EC_CODEC_I2S_ENABLE - * 1 to enable, 0 to disable. - */ - uint8_t i2s_enable; - - /* - * EC_CODEC_I2S_SET_CONFIG - * Value should be one of ec_i2s_config. - */ - uint8_t i2s_config; +struct __ec_align4 ec_param_ec_codec_i2s_rx_set_bclk { + uint32_t bclk; +}; - /* - * EC_CODEC_I2S_SET_TDM_CONFIG - * Value should be one of ec_i2s_config. - */ - struct ec_param_codec_i2s_tdm tdm_param; +struct __ec_align4 ec_param_ec_codec_i2s_rx { + uint8_t cmd; /* enum ec_codec_i2s_rx_subcmd */ + uint8_t reserved[3]; - /* - * EC_CODEC_I2S_SET_BCLK - */ - uint32_t bclk; + union { + struct ec_param_ec_codec_i2s_rx_set_sample_depth + set_sample_depth_param; + struct ec_param_ec_codec_i2s_rx_set_gain + set_gain_param; + struct ec_param_ec_codec_i2s_rx_set_daifmt + set_daifmt_param; + struct ec_param_ec_codec_i2s_rx_set_bclk + set_bclk_param; }; }; +struct __ec_align1 ec_response_ec_codec_i2s_rx_get_gain { + uint8_t left; + uint8_t right; +}; /*****************************************************************************/ /* System commands */ -- cgit v1.2.3 From 8f731d4c92c2ef9434d4d7f84882c6429754164b Mon Sep 17 00:00:00 2001 From: Tzung-Bi Shih Date: Mon, 14 Oct 2019 18:20:15 +0800 Subject: ASoC: cros_ec_codec: extract DMIC EC command from I2S RX Extract DMIC EC command from I2S RX. Setting and getting microphone gains is common features. Signed-off-by: Tzung-Bi Shih Acked-By: Benson Leung Link: https://lore.kernel.org/r/20191014180059.03.I93d9c65964f3c30f85a36d228e31150ff1917706@changeid Signed-off-by: Mark Brown --- include/linux/platform_data/cros_ec_commands.h | 49 +++++++++++++++++++------- 1 file changed, 36 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index 261ac83bd007..58e460c015ef 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -4466,18 +4466,48 @@ enum mkbp_cec_event { /*****************************************************************************/ +/* Commands for DMIC on audio codec. */ +#define EC_CMD_EC_CODEC_DMIC 0x00BC + +enum ec_codec_dmic_subcmd { + EC_CODEC_DMIC_SET_GAIN = 0x0, + EC_CODEC_DMIC_GET_GAIN = 0x1, + EC_CODEC_DMIC_SUBCMD_COUNT, +}; + +struct __ec_align1 ec_param_ec_codec_dmic_set_gain { + uint8_t left; + uint8_t right; + uint8_t reserved[2]; +}; + +struct __ec_align4 ec_param_ec_codec_dmic { + uint8_t cmd; /* enum ec_codec_dmic_subcmd */ + uint8_t reserved[3]; + + union { + struct ec_param_ec_codec_dmic_set_gain + set_gain_param; + }; +}; + +struct __ec_align1 ec_response_ec_codec_dmic_get_gain { + uint8_t left; + uint8_t right; +}; + +/*****************************************************************************/ + /* Commands for I2S RX on audio codec. */ -#define EC_CMD_EC_CODEC_I2S_RX 0x00BC +#define EC_CMD_EC_CODEC_I2S_RX 0x00BD enum ec_codec_i2s_rx_subcmd { EC_CODEC_I2S_RX_ENABLE = 0x0, EC_CODEC_I2S_RX_DISABLE = 0x1, - EC_CODEC_I2S_RX_SET_GAIN = 0x2, - EC_CODEC_I2S_RX_GET_GAIN = 0x3, - EC_CODEC_I2S_RX_SET_SAMPLE_DEPTH = 0x4, - EC_CODEC_I2S_RX_SET_DAIFMT = 0x5, - EC_CODEC_I2S_RX_SET_BCLK = 0x6, + EC_CODEC_I2S_RX_SET_SAMPLE_DEPTH = 0x2, + EC_CODEC_I2S_RX_SET_DAIFMT = 0x3, + EC_CODEC_I2S_RX_SET_BCLK = 0x4, EC_CODEC_I2S_RX_SUBCMD_COUNT, }; @@ -4521,8 +4551,6 @@ struct __ec_align4 ec_param_ec_codec_i2s_rx { union { struct ec_param_ec_codec_i2s_rx_set_sample_depth set_sample_depth_param; - struct ec_param_ec_codec_i2s_rx_set_gain - set_gain_param; struct ec_param_ec_codec_i2s_rx_set_daifmt set_daifmt_param; struct ec_param_ec_codec_i2s_rx_set_bclk @@ -4530,11 +4558,6 @@ struct __ec_align4 ec_param_ec_codec_i2s_rx { }; }; -struct __ec_align1 ec_response_ec_codec_i2s_rx_get_gain { - uint8_t left; - uint8_t right; -}; - /*****************************************************************************/ /* System commands */ -- cgit v1.2.3 From 104c6f8f7ff859ddd53b69c4af11e83f2971f0c4 Mon Sep 17 00:00:00 2001 From: Tzung-Bi Shih Date: Thu, 17 Oct 2019 22:00:09 +0800 Subject: platform/chrome: cros_ec: add common commands for EC codec Add the following common commands: - GET_CAPABILITIES - GET_SHM_ADDR - SET_SHM_ADDR Acked-by: Benson Leung Signed-off-by: Tzung-Bi Shih Link: https://lore.kernel.org/r/20191017213539.04.Idc3c6e1cd94b70bf010249928d4a93c6c90495b7@changeid Signed-off-by: Mark Brown --- include/linux/platform_data/cros_ec_commands.h | 64 +++++++++++++++++++++++++- 1 file changed, 62 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index 58e460c015ef..3ca0fa9e92a7 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -4466,8 +4466,68 @@ enum mkbp_cec_event { /*****************************************************************************/ +/* Commands for audio codec. */ +#define EC_CMD_EC_CODEC 0x00BC + +enum ec_codec_subcmd { + EC_CODEC_GET_CAPABILITIES = 0x0, + EC_CODEC_GET_SHM_ADDR = 0x1, + EC_CODEC_SET_SHM_ADDR = 0x2, + EC_CODEC_SUBCMD_COUNT, +}; + +enum ec_codec_cap { + EC_CODEC_CAP_LAST = 32, +}; + +enum ec_codec_shm_id { + EC_CODEC_SHM_ID_LAST, +}; + +enum ec_codec_shm_type { + EC_CODEC_SHM_TYPE_EC_RAM = 0x0, + EC_CODEC_SHM_TYPE_SYSTEM_RAM = 0x1, +}; + +struct __ec_align1 ec_param_ec_codec_get_shm_addr { + uint8_t shm_id; + uint8_t reserved[3]; +}; + +struct __ec_align4 ec_param_ec_codec_set_shm_addr { + uint64_t phys_addr; + uint32_t len; + uint8_t shm_id; + uint8_t reserved[3]; +}; + +struct __ec_align4 ec_param_ec_codec { + uint8_t cmd; /* enum ec_codec_subcmd */ + uint8_t reserved[3]; + + union { + struct ec_param_ec_codec_get_shm_addr + get_shm_addr_param; + struct ec_param_ec_codec_set_shm_addr + set_shm_addr_param; + }; +}; + +struct __ec_align4 ec_response_ec_codec_get_capabilities { + uint32_t capabilities; +}; + +struct __ec_align4 ec_response_ec_codec_get_shm_addr { + uint64_t phys_addr; + uint32_t len; + uint8_t type; + uint8_t reserved[3]; +}; + +/*****************************************************************************/ + /* Commands for DMIC on audio codec. */ -#define EC_CMD_EC_CODEC_DMIC 0x00BC +#define EC_CMD_EC_CODEC_DMIC 0x00BD enum ec_codec_dmic_subcmd { EC_CODEC_DMIC_SET_GAIN = 0x0, @@ -4500,7 +4560,7 @@ struct __ec_align1 ec_response_ec_codec_dmic_get_gain { /* Commands for I2S RX on audio codec. */ -#define EC_CMD_EC_CODEC_I2S_RX 0x00BD +#define EC_CMD_EC_CODEC_I2S_RX 0x00BE enum ec_codec_i2s_rx_subcmd { EC_CODEC_I2S_RX_ENABLE = 0x0, -- cgit v1.2.3 From f3e82ad43ca538a7e0db0f310e26c5e75db6ba18 Mon Sep 17 00:00:00 2001 From: Tzung-Bi Shih Date: Thu, 17 Oct 2019 22:00:10 +0800 Subject: ASoC: cros_ec_codec: read max DMIC gain from EC codec Read max DMIC gain from EC codec instead of DTS. Also removes the dt-binding of max-dmic-gain. Acked-by: Rob Herring Acked-by: Benson Leung Signed-off-by: Tzung-Bi Shih Link: https://lore.kernel.org/r/20191017213539.05.Id4657c864d544634f2b5c1c9b34fa8232ecba44d@changeid Signed-off-by: Mark Brown --- include/linux/platform_data/cros_ec_commands.h | 43 ++++++++++++++++++++------ 1 file changed, 33 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index 3ca0fa9e92a7..21db0d4d4025 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -4530,30 +4530,53 @@ struct __ec_align4 ec_response_ec_codec_get_shm_addr { #define EC_CMD_EC_CODEC_DMIC 0x00BD enum ec_codec_dmic_subcmd { - EC_CODEC_DMIC_SET_GAIN = 0x0, - EC_CODEC_DMIC_GET_GAIN = 0x1, + EC_CODEC_DMIC_GET_MAX_GAIN = 0x0, + EC_CODEC_DMIC_SET_GAIN_IDX = 0x1, + EC_CODEC_DMIC_GET_GAIN_IDX = 0x2, EC_CODEC_DMIC_SUBCMD_COUNT, }; -struct __ec_align1 ec_param_ec_codec_dmic_set_gain { - uint8_t left; - uint8_t right; +enum ec_codec_dmic_channel { + EC_CODEC_DMIC_CHANNEL_0 = 0x0, + EC_CODEC_DMIC_CHANNEL_1 = 0x1, + EC_CODEC_DMIC_CHANNEL_2 = 0x2, + EC_CODEC_DMIC_CHANNEL_3 = 0x3, + EC_CODEC_DMIC_CHANNEL_4 = 0x4, + EC_CODEC_DMIC_CHANNEL_5 = 0x5, + EC_CODEC_DMIC_CHANNEL_6 = 0x6, + EC_CODEC_DMIC_CHANNEL_7 = 0x7, + EC_CODEC_DMIC_CHANNEL_COUNT, +}; + +struct __ec_align1 ec_param_ec_codec_dmic_set_gain_idx { + uint8_t channel; /* enum ec_codec_dmic_channel */ + uint8_t gain; uint8_t reserved[2]; }; +struct __ec_align1 ec_param_ec_codec_dmic_get_gain_idx { + uint8_t channel; /* enum ec_codec_dmic_channel */ + uint8_t reserved[3]; +}; + struct __ec_align4 ec_param_ec_codec_dmic { uint8_t cmd; /* enum ec_codec_dmic_subcmd */ uint8_t reserved[3]; union { - struct ec_param_ec_codec_dmic_set_gain - set_gain_param; + struct ec_param_ec_codec_dmic_set_gain_idx + set_gain_idx_param; + struct ec_param_ec_codec_dmic_get_gain_idx + get_gain_idx_param; }; }; -struct __ec_align1 ec_response_ec_codec_dmic_get_gain { - uint8_t left; - uint8_t right; +struct __ec_align1 ec_response_ec_codec_dmic_get_max_gain { + uint8_t max_gain; +}; + +struct __ec_align1 ec_response_ec_codec_dmic_get_gain_idx { + uint8_t gain; }; /*****************************************************************************/ -- cgit v1.2.3 From 4f3d957718e7f0ac2b033dbf48c7cddecd0a8dd3 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 18 Oct 2019 13:54:25 +0300 Subject: spi: pxa2xx: No need to keep pointer to platform device There is no need to keep a pointer to the platform device. Currently there are no users of it directly, and if there will be in the future we may restore it from pointer to the struct device. Convert all users at the same time. Cc: Russell King Cc: Jaroslav Kysela Cc: Takashi Iwai Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20191018105429.82782-1-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- include/linux/pxa2xx_ssp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h index a5d1837e4f35..6facf27865f9 100644 --- a/include/linux/pxa2xx_ssp.h +++ b/include/linux/pxa2xx_ssp.h @@ -206,7 +206,7 @@ enum pxa_ssp_type { }; struct ssp_device { - struct platform_device *pdev; + struct device *dev; struct list_head node; struct clk *clk; -- cgit v1.2.3 From a77fc11156893bd0332a1fb6e314e6268abf720b Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 11 Oct 2019 09:18:05 +0200 Subject: mfd: Switch the AB8500 GPADC to IIO The AB8500 GPADC driver is indeed a "general purpose ADC" driver, and while the IIO subsystem did not exist when the driver was first merged, it is never too late to clean things up and move it to the right place. Nowadays IIO provides the right abstractions and interfaces to do generic ADC work in the kernel. We have to cut a bunch of debugfs luggage to make this transition swift, but all these files to is read out the raw values of the ADC and the IIO subsystem already has a standard sysfs ABI for doing exactly this: no debugfs is needed. Acked-by: Lee Jones Signed-off-by: Linus Walleij Signed-off-by: Jonathan Cameron --- include/linux/mfd/abx500/ab8500-gpadc.h | 75 --------------------------------- 1 file changed, 75 deletions(-) delete mode 100644 include/linux/mfd/abx500/ab8500-gpadc.h (limited to 'include/linux') diff --git a/include/linux/mfd/abx500/ab8500-gpadc.h b/include/linux/mfd/abx500/ab8500-gpadc.h deleted file mode 100644 index 836c944abe2e..000000000000 --- a/include/linux/mfd/abx500/ab8500-gpadc.h +++ /dev/null @@ -1,75 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2010 ST-Ericsson SA - * - * Author: Arun R Murthy - * Author: Daniel Willerud - * Author: M'boumba Cedric Madianga - */ - -#ifndef _AB8500_GPADC_H -#define _AB8500_GPADC_H - -/* GPADC source: From datasheet(ADCSwSel[4:0] in GPADCCtrl2 - * and ADCHwSel[4:0] in GPADCCtrl3 ) */ -#define BAT_CTRL 0x01 -#define BTEMP_BALL 0x02 -#define MAIN_CHARGER_V 0x03 -#define ACC_DETECT1 0x04 -#define ACC_DETECT2 0x05 -#define ADC_AUX1 0x06 -#define ADC_AUX2 0x07 -#define MAIN_BAT_V 0x08 -#define VBUS_V 0x09 -#define MAIN_CHARGER_C 0x0A -#define USB_CHARGER_C 0x0B -#define BK_BAT_V 0x0C -#define DIE_TEMP 0x0D -#define USB_ID 0x0E -#define XTAL_TEMP 0x12 -#define VBAT_TRUE_MEAS 0x13 -#define BAT_CTRL_AND_IBAT 0x1C -#define VBAT_MEAS_AND_IBAT 0x1D -#define VBAT_TRUE_MEAS_AND_IBAT 0x1E -#define BAT_TEMP_AND_IBAT 0x1F - -/* Virtual channel used only for ibat convertion to ampere - * Battery current conversion (ibat) cannot be requested as a single conversion - * but it is always in combination with other input requests - */ -#define IBAT_VIRTUAL_CHANNEL 0xFF - -#define SAMPLE_1 1 -#define SAMPLE_4 4 -#define SAMPLE_8 8 -#define SAMPLE_16 16 -#define RISING_EDGE 0 -#define FALLING_EDGE 1 - -/* Arbitrary ADC conversion type constants */ -#define ADC_SW 0 -#define ADC_HW 1 - -struct ab8500_gpadc; - -struct ab8500_gpadc *ab8500_gpadc_get(char *name); -int ab8500_gpadc_sw_hw_convert(struct ab8500_gpadc *gpadc, u8 channel, - u8 avg_sample, u8 trig_edge, u8 trig_timer, u8 conv_type); -static inline int ab8500_gpadc_convert(struct ab8500_gpadc *gpadc, u8 channel) -{ - return ab8500_gpadc_sw_hw_convert(gpadc, channel, - SAMPLE_16, 0, 0, ADC_SW); -} - -int ab8500_gpadc_read_raw(struct ab8500_gpadc *gpadc, u8 channel, - u8 avg_sample, u8 trig_edge, u8 trig_timer, u8 conv_type); -int ab8500_gpadc_double_read_raw(struct ab8500_gpadc *gpadc, u8 channel, - u8 avg_sample, u8 trig_edge, u8 trig_timer, u8 conv_type, - int *ibat); -int ab8500_gpadc_ad_to_voltage(struct ab8500_gpadc *gpadc, - u8 channel, int ad_value); -void ab8540_gpadc_get_otp(struct ab8500_gpadc *gpadc, - u16 *vmain_l, u16 *vmain_h, u16 *btemp_l, u16 *btemp_h, - u16 *vbat_l, u16 *vbat_h, u16 *ibat_l, u16 *ibat_h); - -#endif /* _AB8500_GPADC_H */ -- cgit v1.2.3 From d49e6ee2d6c2b654c5eeb9aa1c4986cd1bec2582 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Sun, 6 Oct 2019 16:03:09 -0400 Subject: counter: Simplify the count_read and count_write callbacks The count_read and count_write callbacks are simplified to pass val as unsigned long rather than as an opaque data structure. The opaque counter_count_read_value and counter_count_write_value structures, counter_count_value_type enum, and relevant counter_count_read_value_set and counter_count_write_value_get functions, are removed as they are no longer used. Cc: Patrick Havelange Acked-by: Fabrice Gasnier Acked-by: David Lechner Signed-off-by: William Breathitt Gray Signed-off-by: Jonathan Cameron --- include/linux/counter.h | 74 ++++++------------------------------------------- 1 file changed, 9 insertions(+), 65 deletions(-) (limited to 'include/linux') diff --git a/include/linux/counter.h b/include/linux/counter.h index a061cdcdef7c..32fb4d8cc3fd 100644 --- a/include/linux/counter.h +++ b/include/linux/counter.h @@ -290,53 +290,22 @@ struct counter_device_state { const struct attribute_group **groups; }; -/** - * struct counter_signal_read_value - Opaque Signal read value - * @buf: string representation of Signal read value - * @len: length of string in @buf - */ -struct counter_signal_read_value { - char *buf; - size_t len; -}; - -/** - * struct counter_count_read_value - Opaque Count read value - * @buf: string representation of Count read value - * @len: length of string in @buf - */ -struct counter_count_read_value { - char *buf; - size_t len; -}; - -/** - * struct counter_count_write_value - Opaque Count write value - * @buf: string representation of Count write value - */ -struct counter_count_write_value { - const char *buf; +enum counter_signal_value { + COUNTER_SIGNAL_LOW = 0, + COUNTER_SIGNAL_HIGH }; /** * struct counter_ops - Callbacks from driver * @signal_read: optional read callback for Signal attribute. The read * value of the respective Signal should be passed back via - * the val parameter. val points to an opaque type which - * should be set only by calling the - * counter_signal_read_value_set function from within the - * signal_read callback. + * the val parameter. * @count_read: optional read callback for Count attribute. The read * value of the respective Count should be passed back via - * the val parameter. val points to an opaque type which - * should be set only by calling the - * counter_count_read_value_set function from within the - * count_read callback. + * the val parameter. * @count_write: optional write callback for Count attribute. The write * value for the respective Count is passed in via the val - * parameter. val points to an opaque type which should be - * accessed only by calling the - * counter_count_write_value_get function. + * parameter. * @function_get: function to get the current count function mode. Returns * 0 on success and negative error code on error. The index * of the respective Count's returned function mode should @@ -355,13 +324,11 @@ struct counter_count_write_value { struct counter_ops { int (*signal_read)(struct counter_device *counter, struct counter_signal *signal, - struct counter_signal_read_value *val); + enum counter_signal_value *val); int (*count_read)(struct counter_device *counter, - struct counter_count *count, - struct counter_count_read_value *val); + struct counter_count *count, unsigned long *val); int (*count_write)(struct counter_device *counter, - struct counter_count *count, - struct counter_count_write_value *val); + struct counter_count *count, unsigned long val); int (*function_get)(struct counter_device *counter, struct counter_count *count, size_t *function); int (*function_set)(struct counter_device *counter, @@ -477,29 +444,6 @@ struct counter_device { void *priv; }; -enum counter_signal_level { - COUNTER_SIGNAL_LEVEL_LOW = 0, - COUNTER_SIGNAL_LEVEL_HIGH -}; - -enum counter_signal_value_type { - COUNTER_SIGNAL_LEVEL = 0 -}; - -enum counter_count_value_type { - COUNTER_COUNT_POSITION = 0, -}; - -void counter_signal_read_value_set(struct counter_signal_read_value *const val, - const enum counter_signal_value_type type, - void *const data); -void counter_count_read_value_set(struct counter_count_read_value *const val, - const enum counter_count_value_type type, - void *const data); -int counter_count_write_value_get(void *const data, - const enum counter_count_value_type type, - const struct counter_count_write_value *const val); - int counter_register(struct counter_device *const counter); void counter_unregister(struct counter_device *const counter); int devm_counter_register(struct device *dev, -- cgit v1.2.3 From c5d550fb6e126a73247d4a98d6314a84f4805b98 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Sun, 6 Oct 2019 16:03:11 -0400 Subject: counter: Fix typo in action_get description The action_get callback returns a Synapse's action mode. Signed-off-by: William Breathitt Gray Signed-off-by: Jonathan Cameron --- include/linux/counter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/counter.h b/include/linux/counter.h index 32fb4d8cc3fd..9dbd5df4cd34 100644 --- a/include/linux/counter.h +++ b/include/linux/counter.h @@ -315,7 +315,7 @@ enum counter_signal_value { * Count's functions_list array. * @action_get: function to get the current action mode. Returns 0 on * success and negative error code on error. The index of - * the respective Signal's returned action mode should be + * the respective Synapse's returned action mode should be * passed back via the action parameter. * @action_set: function to set the action mode. action is the index of * the requested action mode from the respective Synapse's -- cgit v1.2.3 From af65d1ad416bc6e069ccb9e649faeda224248f96 Mon Sep 17 00:00:00 2001 From: "Patel, Mayurkumar" Date: Fri, 18 Oct 2019 16:52:21 +0000 Subject: PCI/AER: Save AER Capability for suspend/resume Previously we did not save and restore the AER configuration on suspend/resume, so the configuration may be lost after resume. Save the AER configuration during suspend and restore it during resume. [bhelgaas: commit log] Link: https://lore.kernel.org/r/92EBB4272BF81E4089A7126EC1E7B28492C3B007@IRSMSX101.ger.corp.intel.com Signed-off-by: Mayurkumar Patel Signed-off-by: Kuppuswamy Sathyanarayanan Signed-off-by: Bjorn Helgaas Reviewed-by: Andy Shevchenko --- include/linux/aer.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/aer.h b/include/linux/aer.h index 514bffa11dbb..fa19e01f418a 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -46,6 +46,8 @@ int pci_enable_pcie_error_reporting(struct pci_dev *dev); int pci_disable_pcie_error_reporting(struct pci_dev *dev); int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev); int pci_cleanup_aer_error_status_regs(struct pci_dev *dev); +void pci_save_aer_state(struct pci_dev *dev); +void pci_restore_aer_state(struct pci_dev *dev); #else static inline int pci_enable_pcie_error_reporting(struct pci_dev *dev) { @@ -63,6 +65,8 @@ static inline int pci_cleanup_aer_error_status_regs(struct pci_dev *dev) { return -EINVAL; } +static inline void pci_save_aer_state(struct pci_dev *dev) {} +static inline void pci_restore_aer_state(struct pci_dev *dev) {} #endif void cper_print_aer(struct pci_dev *dev, int aer_severity, -- cgit v1.2.3 From ae79d5588a04aec9dc4b0c6df700d131447306e0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 19 Oct 2019 09:15:27 +0200 Subject: perf/core: Fix !CONFIG_PERF_EVENTS build warnings and failures sparc64 runs into this warning: include/linux/security.h:1913:52: warning: 'struct perf_event' declared inside parameter list will not be visible outside of this definition or declaration which is escalated to a build error in some of the .c files due to -Werror. Fix it via a forward declaration, like we do for perf_event_attr, the stub inlines don't actually need to know the structure of this struct. Fixes: da97e18458fb: ("perf_event: Add support for LSM and SELinux checks") Cc: "Joel Fernandes (Google)" Cc: linux-kernel@vger.kernel.org Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Alexander Shishkin Cc: Mark Rutland Cc: Namhyung Kim Signed-off-by: Ingo Molnar --- include/linux/security.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 4df79ffdc3a0..0a86bfea64d0 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1896,6 +1896,7 @@ static inline void security_bpf_prog_free(struct bpf_prog_aux *aux) #ifdef CONFIG_PERF_EVENTS struct perf_event_attr; +struct perf_event; #ifdef CONFIG_SECURITY extern int security_perf_event_open(struct perf_event_attr *attr, int type); -- cgit v1.2.3 From 8d6ac1cec7251a74c2d2f21e72b842c000206811 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Wed, 16 Oct 2019 22:16:22 +0200 Subject: rtc: add timestamp for end of 2199 Some RTCs handle date up to 2199. Link: https://lore.kernel.org/r/20191016201626.31309-1-alexandre.belloni@bootlin.com Signed-off-by: Alexandre Belloni --- include/linux/rtc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 2680f9b2b119..e86a9f307b82 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -165,6 +165,7 @@ struct rtc_device { #define RTC_TIMESTAMP_BEGIN_2000 946684800LL /* 2000-01-01 00:00:00 */ #define RTC_TIMESTAMP_END_2063 2966371199LL /* 2063-12-31 23:59:59 */ #define RTC_TIMESTAMP_END_2099 4102444799LL /* 2099-12-31 23:59:59 */ +#define RTC_TIMESTAMP_END_2199 7258118399LL /* 2199-12-31 23:59:59 */ #define RTC_TIMESTAMP_END_9999 253402300799LL /* 9999-12-31 23:59:59 */ extern struct rtc_device *devm_rtc_device_register(struct device *dev, -- cgit v1.2.3 From ae4866884338259318be94fa8f88015cced07000 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Sat, 19 Oct 2019 22:50:34 +0200 Subject: rtc: introduce lock helpers Introduce rtc_lock and rtc_unlock to shorten the code when locking and unlocking ops_lock from drivers. Link: https://lore.kernel.org/r/20191019205034.6382-1-alexandre.belloni@bootlin.com Signed-off-by: Alexandre Belloni --- include/linux/rtc.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rtc.h b/include/linux/rtc.h index e86a9f307b82..4e9d3c71addb 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -159,6 +159,9 @@ struct rtc_device { }; #define to_rtc_device(d) container_of(d, struct rtc_device, dev) +#define rtc_lock(d) mutex_lock(&d->ops_lock) +#define rtc_unlock(d) mutex_unlock(&d->ops_lock) + /* useful timestamps */ #define RTC_TIMESTAMP_BEGIN_0000 -62167219200ULL /* 0000-01-01 00:00:00 */ #define RTC_TIMESTAMP_BEGIN_1900 -2208988800LL /* 1900-01-01 00:00:00 */ -- cgit v1.2.3 From 535bbe6a1f94dfc3e23cf1c9687459de7f3d2271 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 16 Sep 2019 14:23:44 -0500 Subject: soundwire: remove DAI_ID_RANGE definitions There is no reason to reserve a range of DAI IDs for SoundWire. This is not scalable and it's better to let the ASoC core allocate the dai->id when registering a component. Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20190916192348.467-3-pierre-louis.bossart@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index ea787201c3ac..688b40e65c89 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -40,9 +40,6 @@ struct sdw_slave; #define SDW_VALID_PORT_RANGE(n) ((n) <= 14 && (n) >= 1) -#define SDW_DAI_ID_RANGE_START 100 -#define SDW_DAI_ID_RANGE_END 200 - enum { SDW_PORT_DIRN_SINK = 0, SDW_PORT_DIRN_SOURCE, -- cgit v1.2.3 From b6bc07d4360dbf766e551f18e43c67fff6784955 Mon Sep 17 00:00:00 2001 From: Tzung-Bi Shih Date: Sat, 19 Oct 2019 15:02:51 +0800 Subject: ASoC: cros_ec_codec: support WoV 1. Get EC codec's capabilities. 2. Get and set SHM address if any. 3. Transmit language model to EC codec if needed. 4. Start to read audio data from EC codec if receives host event. Signed-off-by: Tzung-Bi Shih Acked-by: Enric Balletbo i Serra Link: https://lore.kernel.org/r/20191019143504.1.I5388b69a7a9c551078fed216a77440cee6dedf49@changeid Signed-off-by: Mark Brown --- include/linux/platform_data/cros_ec_commands.h | 69 ++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index 21db0d4d4025..69210881ebac 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -556,6 +556,9 @@ enum host_event_code { /* Keyboard recovery combo with hardware reinitialization */ EC_HOST_EVENT_KEYBOARD_RECOVERY_HW_REINIT = 30, + /* WoV */ + EC_HOST_EVENT_WOV = 31, + /* * The high bit of the event mask is not used as a host event code. If * it reads back as set, then the entire event mask should be @@ -4477,10 +4480,14 @@ enum ec_codec_subcmd { }; enum ec_codec_cap { + EC_CODEC_CAP_WOV_AUDIO_SHM = 0, + EC_CODEC_CAP_WOV_LANG_SHM = 1, EC_CODEC_CAP_LAST = 32, }; enum ec_codec_shm_id { + EC_CODEC_SHM_ID_WOV_AUDIO = 0x0, + EC_CODEC_SHM_ID_WOV_LANG = 0x1, EC_CODEC_SHM_ID_LAST, }; @@ -4641,6 +4648,68 @@ struct __ec_align4 ec_param_ec_codec_i2s_rx { }; }; +/*****************************************************************************/ +/* Commands for WoV on audio codec. */ + +#define EC_CMD_EC_CODEC_WOV 0x00BF + +enum ec_codec_wov_subcmd { + EC_CODEC_WOV_SET_LANG = 0x0, + EC_CODEC_WOV_SET_LANG_SHM = 0x1, + EC_CODEC_WOV_GET_LANG = 0x2, + EC_CODEC_WOV_ENABLE = 0x3, + EC_CODEC_WOV_DISABLE = 0x4, + EC_CODEC_WOV_READ_AUDIO = 0x5, + EC_CODEC_WOV_READ_AUDIO_SHM = 0x6, + EC_CODEC_WOV_SUBCMD_COUNT, +}; + +/* + * @hash is SHA256 of the whole language model. + * @total_len indicates the length of whole language model. + * @offset is the cursor from the beginning of the model. + * @buf is the packet buffer. + * @len denotes how many bytes in the buf. + */ +struct __ec_align4 ec_param_ec_codec_wov_set_lang { + uint8_t hash[32]; + uint32_t total_len; + uint32_t offset; + uint8_t buf[128]; + uint32_t len; +}; + +struct __ec_align4 ec_param_ec_codec_wov_set_lang_shm { + uint8_t hash[32]; + uint32_t total_len; +}; + +struct __ec_align4 ec_param_ec_codec_wov { + uint8_t cmd; /* enum ec_codec_wov_subcmd */ + uint8_t reserved[3]; + + union { + struct ec_param_ec_codec_wov_set_lang + set_lang_param; + struct ec_param_ec_codec_wov_set_lang_shm + set_lang_shm_param; + }; +}; + +struct __ec_align4 ec_response_ec_codec_wov_get_lang { + uint8_t hash[32]; +}; + +struct __ec_align4 ec_response_ec_codec_wov_read_audio { + uint8_t buf[128]; + uint32_t len; +}; + +struct __ec_align4 ec_response_ec_codec_wov_read_audio_shm { + uint32_t offset; + uint32_t len; +}; + /*****************************************************************************/ /* System commands */ -- cgit v1.2.3 From 52525b7a3cf82adec5c6cf0ecbd23ff228badc94 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Fri, 18 Oct 2019 15:38:47 +0800 Subject: PCI: Add a helper to check Power Resource Requirements _PR3 existence A driver may want to know the existence of _PR3, to choose different runtime suspend behavior. A user will be add in next patch. This is mostly the same as nouveau_pr3_present(). Signed-off-by: Kai-Heng Feng Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20191018073848.14590-1-kai.heng.feng@canonical.com Signed-off-by: Takashi Iwai --- include/linux/pci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index f9088c89a534..1d15c5d49cdd 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2310,9 +2310,11 @@ struct irq_domain *pci_host_bridge_acpi_msi_domain(struct pci_bus *bus); void pci_msi_register_fwnode_provider(struct fwnode_handle *(*fn)(struct device *)); +bool pci_pr3_present(struct pci_dev *pdev); #else static inline struct irq_domain * pci_host_bridge_acpi_msi_domain(struct pci_bus *bus) { return NULL; } +static bool pci_pr3_present(struct pci_dev *pdev) { return false; } #endif #ifdef CONFIG_EEH -- cgit v1.2.3 From d8ede917f5cd472e344be636d62b8e1f10bdae5e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 9 Aug 2019 14:42:28 +0200 Subject: jbd2: Remove jbd_trylock_bh_state() No users. Signed-off-by: Thomas Gleixner Reviewed-by: Jan Kara Cc: linux-ext4@vger.kernel.org Cc: "Theodore Ts'o" Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20190809124233.13277-3-jack@suse.cz Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 603fbc4e2f70..7fb4d98e2adb 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -347,11 +347,6 @@ static inline void jbd_lock_bh_state(struct buffer_head *bh) bit_spin_lock(BH_State, &bh->b_state); } -static inline int jbd_trylock_bh_state(struct buffer_head *bh) -{ - return bit_spin_trylock(BH_State, &bh->b_state); -} - static inline int jbd_is_locked_bh_state(struct buffer_head *bh) { return bit_spin_is_locked(BH_State, &bh->b_state); -- cgit v1.2.3 From 93108ebb848df8d4948d51db14714a14c4e81111 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 9 Aug 2019 14:42:29 +0200 Subject: jbd2: Move dropping of jh reference out of un/re-filing functions __jbd2_journal_unfile_buffer() and __jbd2_journal_refile_buffer() drop transaction's jh reference when they remove jh from a transaction. This will be however inconvenient once we move state lock into journal_head itself as we still need to unlock it and we'd need to grab jh reference just for that. Move dropping of jh reference out of these functions into the few callers. Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20190809124233.13277-4-jack@suse.cz Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 7fb4d98e2adb..d0c77478d49d 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1252,7 +1252,7 @@ JBD2_FEATURE_INCOMPAT_FUNCS(csum3, CSUM_V3) /* Filing buffers */ extern void jbd2_journal_unfile_buffer(journal_t *, struct journal_head *); -extern void __jbd2_journal_refile_buffer(struct journal_head *); +extern bool __jbd2_journal_refile_buffer(struct journal_head *); extern void jbd2_journal_refile_buffer(journal_t *, struct journal_head *); extern void __jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int); extern void __journal_free_buffer(struct journal_head *bh); -- cgit v1.2.3 From 464170647b5648bb81f3615567485fcb9a685bed Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 9 Aug 2019 14:42:32 +0200 Subject: jbd2: Make state lock a spinlock Bit-spinlocks are problematic on PREEMPT_RT if functions which might sleep on RT, e.g. spin_lock(), alloc/free(), are invoked inside the lock held region because bit spinlocks disable preemption even on RT. A first attempt was to replace state lock with a spinlock placed in struct buffer_head and make the locking conditional on PREEMPT_RT and DEBUG_BIT_SPINLOCKS. Jan pointed out that there is a 4 byte hole in struct journal_head where a regular spinlock fits in and he would not object to convert the state lock to a spinlock unconditionally. Aside of solving the RT problem, this also gains lockdep coverage for the journal head state lock (bit-spinlocks are not covered by lockdep as it's hard to fit a lockdep map into a single bit). The trivial change would have been to convert the jbd_*lock_bh_state() inlines, but that comes with the downside that these functions take a buffer head pointer which needs to be converted to a journal head pointer which adds another level of indirection. As almost all functions which use this lock have a journal head pointer readily available, it makes more sense to remove the lock helper inlines and write out spin_*lock() at all call sites. Fixup all locking comments as well. Suggested-by: Jan Kara Signed-off-by: Thomas Gleixner Signed-off-by: Jan Kara Cc: "Theodore Ts'o" Cc: Mark Fasheh Cc: Joseph Qi Cc: Joel Becker Cc: Jan Kara Cc: linux-ext4@vger.kernel.org Link: https://lore.kernel.org/r/20190809124233.13277-7-jack@suse.cz Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 20 ++------------------ include/linux/journal-head.h | 21 ++++++++++++++------- 2 files changed, 16 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index d0c77478d49d..9cafbc9a76d9 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -313,7 +313,6 @@ enum jbd_state_bits { BH_Revoked, /* Has been revoked from the log */ BH_RevokeValid, /* Revoked flag is valid */ BH_JBDDirty, /* Is dirty but journaled */ - BH_State, /* Pins most journal_head state */ BH_JournalHead, /* Pins bh->b_private and jh->b_bh */ BH_Shadow, /* IO on shadow buffer is running */ BH_Verified, /* Metadata block has been verified ok */ @@ -342,21 +341,6 @@ static inline struct journal_head *bh2jh(struct buffer_head *bh) return bh->b_private; } -static inline void jbd_lock_bh_state(struct buffer_head *bh) -{ - bit_spin_lock(BH_State, &bh->b_state); -} - -static inline int jbd_is_locked_bh_state(struct buffer_head *bh) -{ - return bit_spin_is_locked(BH_State, &bh->b_state); -} - -static inline void jbd_unlock_bh_state(struct buffer_head *bh) -{ - bit_spin_unlock(BH_State, &bh->b_state); -} - static inline void jbd_lock_bh_journal_head(struct buffer_head *bh) { bit_spin_lock(BH_JournalHead, &bh->b_state); @@ -551,9 +535,9 @@ struct transaction_chp_stats_s { * ->jbd_lock_bh_journal_head() (This is "innermost") * * j_state_lock - * ->jbd_lock_bh_state() + * ->b_state_lock * - * jbd_lock_bh_state() + * b_state_lock * ->j_list_lock * * j_state_lock diff --git a/include/linux/journal-head.h b/include/linux/journal-head.h index 9fb870524314..75bc56109031 100644 --- a/include/linux/journal-head.h +++ b/include/linux/journal-head.h @@ -11,6 +11,8 @@ #ifndef JOURNAL_HEAD_H_INCLUDED #define JOURNAL_HEAD_H_INCLUDED +#include + typedef unsigned int tid_t; /* Unique transaction ID */ typedef struct transaction_s transaction_t; /* Compound transaction type */ @@ -23,6 +25,11 @@ struct journal_head { */ struct buffer_head *b_bh; + /* + * Protect the buffer head state + */ + spinlock_t b_state_lock; + /* * Reference count - see description in journal.c * [jbd_lock_bh_journal_head()] @@ -30,7 +37,7 @@ struct journal_head { int b_jcount; /* - * Journalling list for this buffer [jbd_lock_bh_state()] + * Journalling list for this buffer [b_state_lock] * NOTE: We *cannot* combine this with b_modified into a bitfield * as gcc would then (which the C standard allows but which is * very unuseful) make 64-bit accesses to the bitfield and clobber @@ -41,20 +48,20 @@ struct journal_head { /* * This flag signals the buffer has been modified by * the currently running transaction - * [jbd_lock_bh_state()] + * [b_state_lock] */ unsigned b_modified; /* * Copy of the buffer data frozen for writing to the log. - * [jbd_lock_bh_state()] + * [b_state_lock] */ char *b_frozen_data; /* * Pointer to a saved copy of the buffer containing no uncommitted * deallocation references, so that allocations can avoid overwriting - * uncommitted deletes. [jbd_lock_bh_state()] + * uncommitted deletes. [b_state_lock] */ char *b_committed_data; @@ -63,7 +70,7 @@ struct journal_head { * metadata: either the running transaction or the committing * transaction (if there is one). Only applies to buffers on a * transaction's data or metadata journaling list. - * [j_list_lock] [jbd_lock_bh_state()] + * [j_list_lock] [b_state_lock] * Either of these locks is enough for reading, both are needed for * changes. */ @@ -73,13 +80,13 @@ struct journal_head { * Pointer to the running compound transaction which is currently * modifying the buffer's metadata, if there was already a transaction * committing it when the new transaction touched it. - * [t_list_lock] [jbd_lock_bh_state()] + * [t_list_lock] [b_state_lock] */ transaction_t *b_next_transaction; /* * Doubly-linked list of buffers on a transaction's data, metadata or - * forget queue. [t_list_lock] [jbd_lock_bh_state()] + * forget queue. [t_list_lock] [b_state_lock] */ struct journal_head *b_tnext, *b_tprev; -- cgit v1.2.3 From 46b4bff6572b0552b1ee062043621e4b252638d8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 21 Oct 2019 16:25:20 +0200 Subject: PCI: Fix missing inline for pci_pr3_present() The inline prefix was missing in the dummy function pci_pr3_present() definition. Fix it. Reported-by: kbuild test robot Fixes: 52525b7a3cf8 ("PCI: Add a helper to check Power Resource Requirements _PR3 existence") Link: https://lore.kernel.org/r/201910212111.qHm6OcWx%lkp@intel.com Signed-off-by: Takashi Iwai --- include/linux/pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 1d15c5d49cdd..be529d311122 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2314,7 +2314,7 @@ bool pci_pr3_present(struct pci_dev *pdev); #else static inline struct irq_domain * pci_host_bridge_acpi_msi_domain(struct pci_bus *bus) { return NULL; } -static bool pci_pr3_present(struct pci_dev *pdev) { return false; } +static inline bool pci_pr3_present(struct pci_dev *pdev) { return false; } #endif #ifdef CONFIG_EEH -- cgit v1.2.3 From 27938fd8ba78b4c7f9a2385b7b52cca19ab891b8 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 4 Oct 2019 15:32:07 +0200 Subject: pwm: Update comment on struct pwm_ops::apply MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 71523d1812ac (pwm: Ensure pwm_apply_state() doesn't modify the state argument) updated the kernel-doc for pwm_apply_state(), but not for the ->apply callback in the pwm_ops struct. Signed-off-by: Rasmus Villemoes Reviewed-by: Uwe Kleine-König Reviewed-by: Bjorn Andersson Signed-off-by: Thierry Reding --- include/linux/pwm.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index b2c9c460947d..0ef808d925bb 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -243,10 +243,7 @@ pwm_set_relative_duty_cycle(struct pwm_state *state, unsigned int duty_cycle, * @request: optional hook for requesting a PWM * @free: optional hook for freeing a PWM * @capture: capture and report PWM signal - * @apply: atomically apply a new PWM config. The state argument - * should be adjusted with the real hardware config (if the - * approximate the period or duty_cycle value, state should - * reflect it) + * @apply: atomically apply a new PWM config * @get_state: get the current PWM state. This function is only * called once per PWM device when the PWM chip is * registered. -- cgit v1.2.3 From 8e53622594f5530b5a86094464937dda47fc6e3b Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Wed, 16 Oct 2019 12:42:40 +0200 Subject: pwm: stm32: Remove clutter from ternary operator Remove usage of the ternary operator to assign values for register fields. Instead, parameterize the register and field offset macros and pass the index to them. This removes clutter and improves readability. Signed-off-by: Thierry Reding --- include/linux/mfd/stm32-timers.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/stm32-timers.h b/include/linux/mfd/stm32-timers.h index 067d14655c28..f8db83aedb2b 100644 --- a/include/linux/mfd/stm32-timers.h +++ b/include/linux/mfd/stm32-timers.h @@ -70,14 +70,11 @@ #define TIM_CCER_CC4E BIT(12) /* Capt/Comp 4 out Ena */ #define TIM_CCER_CC4P BIT(13) /* Capt/Comp 4 Polarity */ #define TIM_CCER_CCXE (BIT(0) | BIT(4) | BIT(8) | BIT(12)) -#define TIM_BDTR_BKE BIT(12) /* Break input enable */ -#define TIM_BDTR_BKP BIT(13) /* Break input polarity */ +#define TIM_BDTR_BKE(x) BIT(12 + (x) * 12) /* Break input enable */ +#define TIM_BDTR_BKP(x) BIT(13 + (x) * 12) /* Break input polarity */ #define TIM_BDTR_AOE BIT(14) /* Automatic Output Enable */ #define TIM_BDTR_MOE BIT(15) /* Main Output Enable */ -#define TIM_BDTR_BKF (BIT(16) | BIT(17) | BIT(18) | BIT(19)) -#define TIM_BDTR_BK2F (BIT(20) | BIT(21) | BIT(22) | BIT(23)) -#define TIM_BDTR_BK2E BIT(24) /* Break 2 input enable */ -#define TIM_BDTR_BK2P BIT(25) /* Break 2 input polarity */ +#define TIM_BDTR_BKF(x) (0xf << (16 + (x) * 4)) #define TIM_DCR_DBA GENMASK(4, 0) /* DMA base addr */ #define TIM_DCR_DBL GENMASK(12, 8) /* DMA burst len */ @@ -87,8 +84,7 @@ #define TIM_CR2_MMS2_SHIFT 20 #define TIM_SMCR_TS_SHIFT 4 #define TIM_BDTR_BKF_MASK 0xF -#define TIM_BDTR_BKF_SHIFT 16 -#define TIM_BDTR_BK2F_SHIFT 20 +#define TIM_BDTR_BKF_SHIFT(x) (16 + (x) * 4) enum stm32_timers_dmas { STM32_TIMERS_DMA_CH1, -- cgit v1.2.3 From 598ecfbaa742aca0dcdbbea25681406f95cc0b63 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 17 Oct 2019 13:12:15 -0700 Subject: iomap: lift the xfs writeback code to iomap Take the xfs writeback code and move it to fs/iomap. A new structure with three methods is added as the abstraction from the generic writeback code to the file system. These methods are used to map blocks, submit an ioend, and cancel a page that encountered an error before it was added to an ioend. Signed-off-by: Christoph Hellwig [darrick: rename ->submit_ioend to ->prepare_ioend to clarify what it does] Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- include/linux/iomap.h | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 4b25ad6b5edd..6e00bb843c7f 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -4,6 +4,7 @@ #include #include +#include #include #include #include @@ -12,6 +13,7 @@ struct address_space; struct fiemap_extent_info; struct inode; +struct iomap_writepage_ctx; struct iov_iter; struct kiocb; struct page; @@ -185,6 +187,63 @@ loff_t iomap_seek_data(struct inode *inode, loff_t offset, sector_t iomap_bmap(struct address_space *mapping, sector_t bno, const struct iomap_ops *ops); +/* + * Structure for writeback I/O completions. + */ +struct iomap_ioend { + struct list_head io_list; /* next ioend in chain */ + u16 io_type; + u16 io_flags; /* IOMAP_F_* */ + struct inode *io_inode; /* file being written to */ + size_t io_size; /* size of the extent */ + loff_t io_offset; /* offset in the file */ + void *io_private; /* file system private data */ + struct bio *io_bio; /* bio being built */ + struct bio io_inline_bio; /* MUST BE LAST! */ +}; + +struct iomap_writeback_ops { + /* + * Required, maps the blocks so that writeback can be performed on + * the range starting at offset. + */ + int (*map_blocks)(struct iomap_writepage_ctx *wpc, struct inode *inode, + loff_t offset); + + /* + * Optional, allows the file systems to perform actions just before + * submitting the bio and/or override the bio end_io handler for complex + * operations like copy on write extent manipulation or unwritten extent + * conversions. + */ + int (*prepare_ioend)(struct iomap_ioend *ioend, int status); + + /* + * Optional, allows the file system to discard state on a page where + * we failed to submit any I/O. + */ + void (*discard_page)(struct page *page); +}; + +struct iomap_writepage_ctx { + struct iomap iomap; + struct iomap_ioend *ioend; + const struct iomap_writeback_ops *ops; +}; + +void iomap_finish_ioends(struct iomap_ioend *ioend, int error); +void iomap_ioend_try_merge(struct iomap_ioend *ioend, + struct list_head *more_ioends, + void (*merge_private)(struct iomap_ioend *ioend, + struct iomap_ioend *next)); +void iomap_sort_ioends(struct list_head *ioend_list); +int iomap_writepage(struct page *page, struct writeback_control *wbc, + struct iomap_writepage_ctx *wpc, + const struct iomap_writeback_ops *ops); +int iomap_writepages(struct address_space *mapping, + struct writeback_control *wbc, struct iomap_writepage_ctx *wpc, + const struct iomap_writeback_ops *ops); + /* * Flags for direct I/O ->end_io: */ -- cgit v1.2.3 From ab08b01ec0a205d9c98e712eb504c850a51e6fdb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 17 Oct 2019 13:12:19 -0700 Subject: iomap: move struct iomap_page out of iomap.h Now that all the writepage code is in the iomap code there is no need to keep this structure public. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Reviewed-by: Carlos Maiolino Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- include/linux/iomap.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 6e00bb843c7f..0f741eebf3a7 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -136,23 +136,6 @@ loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags, const struct iomap_ops *ops, void *data, iomap_actor_t actor); -/* - * Structure allocate for each page when block size < PAGE_SIZE to track - * sub-page uptodate status and I/O completions. - */ -struct iomap_page { - atomic_t read_count; - atomic_t write_count; - DECLARE_BITMAP(uptodate, PAGE_SIZE / 512); -}; - -static inline struct iomap_page *to_iomap_page(struct page *page) -{ - if (page_has_private(page)) - return (struct iomap_page *)page_private(page); - return NULL; -} - ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from, const struct iomap_ops *ops); int iomap_readpage(struct page *page, const struct iomap_ops *ops); -- cgit v1.2.3 From 65a60e8687c1c8f69aae3e77eafbf4a54b9f99e7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 18 Oct 2019 16:40:17 -0700 Subject: iomap: better document the IOMAP_F_* flags The documentation for IOMAP_F_* is a bit disorganized, and doesn't mention the fact that most flags are set by the file system and consumed by the iomap core, while IOMAP_F_SIZE_CHANGED is set by the core and consumed by the file system. Signed-off-by: Christoph Hellwig Reviewed-by: Allison Collins Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- include/linux/iomap.h | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 0f741eebf3a7..8016700e6121 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -30,23 +30,38 @@ struct vm_fault; #define IOMAP_INLINE 0x05 /* data inline in the inode */ /* - * Flags for all iomap mappings: + * Flags reported by the file system from iomap_begin: + * + * IOMAP_F_NEW indicates that the blocks have been newly allocated and need + * zeroing for areas that no data is copied to. * * IOMAP_F_DIRTY indicates the inode has uncommitted metadata needed to access * written data and requires fdatasync to commit them to persistent storage. * This needs to take into account metadata changes that *may* be made at IO * completion, such as file size updates from direct IO. + * + * IOMAP_F_SHARED indicates that the blocks are shared, and will need to be + * unshared as part a write. + * + * IOMAP_F_MERGED indicates that the iomap contains the merge of multiple block + * mappings. + * + * IOMAP_F_BUFFER_HEAD indicates that the file system requires the use of + * buffer heads for this mapping. */ -#define IOMAP_F_NEW 0x01 /* blocks have been newly allocated */ -#define IOMAP_F_DIRTY 0x02 /* uncommitted metadata */ -#define IOMAP_F_BUFFER_HEAD 0x04 /* file system requires buffer heads */ -#define IOMAP_F_SIZE_CHANGED 0x08 /* file size has changed */ +#define IOMAP_F_NEW 0x01 +#define IOMAP_F_DIRTY 0x02 +#define IOMAP_F_SHARED 0x04 +#define IOMAP_F_MERGED 0x08 +#define IOMAP_F_BUFFER_HEAD 0x10 /* - * Flags that only need to be reported for IOMAP_REPORT requests: + * Flags set by the core iomap code during operations: + * + * IOMAP_F_SIZE_CHANGED indicates to the iomap_end method that the file size + * has changed as the result of this write operation. */ -#define IOMAP_F_MERGED 0x10 /* contains multiple blocks/extents */ -#define IOMAP_F_SHARED 0x20 /* block shared with another file */ +#define IOMAP_F_SIZE_CHANGED 0x100 /* * Flags from 0x1000 up are for file system specific usage: -- cgit v1.2.3 From 3590c4d8979bcc364e2ded95ab3966b4e436b7bf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 18 Oct 2019 16:41:34 -0700 Subject: iomap: ignore non-shared or non-data blocks in xfs_file_dirty xfs_file_dirty is used to unshare reflink blocks. Rename the function to xfs_file_unshare to better document that purpose, and skip iomaps that are not shared and don't need zeroing. This will allow to simplify the caller. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- include/linux/iomap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 8016700e6121..1623851ade90 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -168,7 +168,7 @@ int iomap_migrate_page(struct address_space *mapping, struct page *newpage, #else #define iomap_migrate_page NULL #endif -int iomap_file_dirty(struct inode *inode, loff_t pos, loff_t len, +int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len, const struct iomap_ops *ops); int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, const struct iomap_ops *ops); -- cgit v1.2.3 From eb81cf9d0e18d438e27339e0d1a49d3ac8644674 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 18 Oct 2019 16:43:08 -0700 Subject: iomap: renumber IOMAP_HOLE to 0 Instead of keeping a separate unnamed state for uninitialized iomaps, renumber IOMAP_HOLE to zero so that an uninitialized iomap is treated as a hole. Suggested-by: Darrick J. Wong Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- include/linux/iomap.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 1623851ade90..53e6e2275d3d 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -23,11 +23,11 @@ struct vm_fault; /* * Types of block ranges for iomap mappings: */ -#define IOMAP_HOLE 0x01 /* no blocks allocated, need allocation */ -#define IOMAP_DELALLOC 0x02 /* delayed allocation blocks */ -#define IOMAP_MAPPED 0x03 /* blocks allocated at @addr */ -#define IOMAP_UNWRITTEN 0x04 /* blocks allocated at @addr in unwritten state */ -#define IOMAP_INLINE 0x05 /* data inline in the inode */ +#define IOMAP_HOLE 0 /* no blocks allocated, need allocation */ +#define IOMAP_DELALLOC 1 /* delayed allocation blocks */ +#define IOMAP_MAPPED 2 /* blocks allocated at @addr */ +#define IOMAP_UNWRITTEN 3 /* blocks allocated at @addr in unwritten state */ +#define IOMAP_INLINE 4 /* data inline in the inode */ /* * Flags reported by the file system from iomap_begin: -- cgit v1.2.3 From c039b99792726346ad46ff17c5a5bcb77a5edac4 Mon Sep 17 00:00:00 2001 From: Goldwyn Rodrigues Date: Fri, 18 Oct 2019 16:44:10 -0700 Subject: iomap: use a srcmap for a read-modify-write I/O The srcmap is used to identify where the read is to be performed from. It is passed to ->iomap_begin, which can fill it in if we need to read data for partially written blocks from a different location than the write target. The srcmap is only supported for buffered writes so far. Signed-off-by: Goldwyn Rodrigues [hch: merged two patches, removed the IOMAP_F_COW flag, use iomap as srcmap if not set, adjust length down to srcmap end as well] Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Acked-by: Goldwyn Rodrigues --- include/linux/iomap.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 53e6e2275d3d..8b09463dae0d 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -129,7 +129,8 @@ struct iomap_ops { * The actual length is returned in iomap->length. */ int (*iomap_begin)(struct inode *inode, loff_t pos, loff_t length, - unsigned flags, struct iomap *iomap); + unsigned flags, struct iomap *iomap, + struct iomap *srcmap); /* * Commit and/or unreserve space previous allocated using iomap_begin. @@ -145,7 +146,7 @@ struct iomap_ops { * Main iomap iterator function. */ typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len, - void *data, struct iomap *iomap); + void *data, struct iomap *iomap, struct iomap *srcmap); loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags, const struct iomap_ops *ops, void *data, -- cgit v1.2.3 From b48c1a45a190898103cec28771efc399fd65a05a Mon Sep 17 00:00:00 2001 From: Steven Price Date: Mon, 21 Oct 2019 16:28:16 +0100 Subject: KVM: arm64: Implement PV_TIME_FEATURES call This provides a mechanism for querying which paravirtualized time features are available in this hypervisor. Also add the header file which defines the ABI for the paravirtualized time features we're about to add. Signed-off-by: Steven Price Signed-off-by: Marc Zyngier --- include/linux/arm-smccc.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index df01a8579034..92e0046ce7a7 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -45,6 +45,7 @@ #define ARM_SMCCC_OWNER_SIP 2 #define ARM_SMCCC_OWNER_OEM 3 #define ARM_SMCCC_OWNER_STANDARD 4 +#define ARM_SMCCC_OWNER_STANDARD_HYP 5 #define ARM_SMCCC_OWNER_TRUSTED_APP 48 #define ARM_SMCCC_OWNER_TRUSTED_APP_END 49 #define ARM_SMCCC_OWNER_TRUSTED_OS 50 @@ -318,5 +319,18 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, #define SMCCC_RET_NOT_SUPPORTED -1 #define SMCCC_RET_NOT_REQUIRED -2 +/* Paravirtualised time calls (defined by ARM DEN0057A) */ +#define ARM_SMCCC_HV_PV_TIME_FEATURES \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_STANDARD_HYP, \ + 0x20) + +#define ARM_SMCCC_HV_PV_TIME_ST \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_STANDARD_HYP, \ + 0x21) + #endif /*__ASSEMBLY__*/ #endif /*__LINUX_ARM_SMCCC_H*/ -- cgit v1.2.3 From cac0f1b7285eaaf9a186c618c3a7304d82ed5493 Mon Sep 17 00:00:00 2001 From: Steven Price Date: Mon, 21 Oct 2019 16:28:17 +0100 Subject: KVM: Implement kvm_put_guest() kvm_put_guest() is analogous to put_user() - it writes a single value to the guest physical address. The implementation is built upon put_user() and so it has the same single copy atomic properties. Signed-off-by: Steven Price Signed-off-by: Marc Zyngier --- include/linux/kvm_host.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 719fc3e15ea4..9907e45f8875 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -746,6 +746,28 @@ int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, unsigned long len); int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, gpa_t gpa, unsigned long len); + +#define __kvm_put_guest(kvm, gfn, offset, value, type) \ +({ \ + unsigned long __addr = gfn_to_hva(kvm, gfn); \ + type __user *__uaddr = (type __user *)(__addr + offset); \ + int __ret = -EFAULT; \ + \ + if (!kvm_is_error_hva(__addr)) \ + __ret = put_user(value, __uaddr); \ + if (!__ret) \ + mark_page_dirty(kvm, gfn); \ + __ret; \ +}) + +#define kvm_put_guest(kvm, gpa, value, type) \ +({ \ + gpa_t __gpa = gpa; \ + struct kvm *__kvm = kvm; \ + __kvm_put_guest(__kvm, __gpa >> PAGE_SHIFT, \ + offset_in_page(__gpa), (value), type); \ +}) + int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len); int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len); struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); -- cgit v1.2.3 From 8564d6372a7d8a6d440441b8ed8020f97f744450 Mon Sep 17 00:00:00 2001 From: Steven Price Date: Mon, 21 Oct 2019 16:28:18 +0100 Subject: KVM: arm64: Support stolen time reporting via shared structure Implement the service call for configuring a shared structure between a VCPU and the hypervisor in which the hypervisor can write the time stolen from the VCPU's execution time by other tasks on the host. User space allocates memory which is placed at an IPA also chosen by user space. The hypervisor then updates the shared structure using kvm_put_guest() to ensure single copy atomicity of the 64-bit value reporting the stolen time in nanoseconds. Whenever stolen time is enabled by the guest, the stolen time counter is reset. The stolen time itself is retrieved from the sched_info structure maintained by the Linux scheduler code. We enable SCHEDSTATS when selecting KVM Kconfig to ensure this value is meaningful. Signed-off-by: Steven Price Signed-off-by: Marc Zyngier --- include/linux/kvm_types.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index bde5374ae021..1c88e69db3d9 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -35,6 +35,8 @@ typedef unsigned long gva_t; typedef u64 gpa_t; typedef u64 gfn_t; +#define GPA_INVALID (~(gpa_t)0) + typedef unsigned long hva_t; typedef u64 hpa_t; typedef u64 hfn_t; -- cgit v1.2.3 From 8538cb22bbce5a988671b68baf0b0f9e86ca1e87 Mon Sep 17 00:00:00 2001 From: Steven Price Date: Mon, 21 Oct 2019 16:28:19 +0100 Subject: KVM: Allow kvm_device_ops to be const Currently a kvm_device_ops structure cannot be const without triggering compiler warnings. However the structure doesn't need to be written to and, by marking it const, it can be read-only in memory. Add some more const keywords to allow this. Reviewed-by: Andrew Jones Signed-off-by: Steven Price Signed-off-by: Marc Zyngier --- include/linux/kvm_host.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9907e45f8875..7a26d5513471 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1262,7 +1262,7 @@ extern unsigned int halt_poll_ns_grow_start; extern unsigned int halt_poll_ns_shrink; struct kvm_device { - struct kvm_device_ops *ops; + const struct kvm_device_ops *ops; struct kvm *kvm; void *private; struct list_head vm_node; @@ -1315,7 +1315,7 @@ struct kvm_device_ops { void kvm_device_get(struct kvm_device *dev); void kvm_device_put(struct kvm_device *dev); struct kvm_device *kvm_device_from_filp(struct file *filp); -int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type); +int kvm_register_device_ops(const struct kvm_device_ops *ops, u32 type); void kvm_unregister_device_ops(u32 type); extern struct kvm_device_ops kvm_mpic_ops; -- cgit v1.2.3 From 541625ac47ce9d0835efaee0fcbaa251b0000a37 Mon Sep 17 00:00:00 2001 From: Steven Price Date: Mon, 21 Oct 2019 16:28:21 +0100 Subject: arm/arm64: Provide a wrapper for SMCCC 1.1 calls SMCCC 1.1 calls may use either HVC or SMC depending on the PSCI conduit. Rather than coding this in every call site, provide a macro which uses the correct instruction. The macro also handles the case where no conduit is configured/available returning a not supported error in res, along with returning the conduit used for the call. This allow us to remove some duplicated code and will be useful later when adding paravirtualized time hypervisor calls. Signed-off-by: Steven Price Acked-by: Will Deacon Signed-off-by: Marc Zyngier --- include/linux/arm-smccc.h | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'include/linux') diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 92e0046ce7a7..59494df0f55b 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -319,6 +319,51 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, #define SMCCC_RET_NOT_SUPPORTED -1 #define SMCCC_RET_NOT_REQUIRED -2 +/* + * Like arm_smccc_1_1* but always returns SMCCC_RET_NOT_SUPPORTED. + * Used when the SMCCC conduit is not defined. The empty asm statement + * avoids compiler warnings about unused variables. + */ +#define __fail_smccc_1_1(...) \ + do { \ + __declare_args(__count_args(__VA_ARGS__), __VA_ARGS__); \ + asm ("" __constraints(__count_args(__VA_ARGS__))); \ + if (___res) \ + ___res->a0 = SMCCC_RET_NOT_SUPPORTED; \ + } while (0) + +/* + * arm_smccc_1_1_invoke() - make an SMCCC v1.1 compliant call + * + * This is a variadic macro taking one to eight source arguments, and + * an optional return structure. + * + * @a0-a7: arguments passed in registers 0 to 7 + * @res: result values from registers 0 to 3 + * + * This macro will make either an HVC call or an SMC call depending on the + * current SMCCC conduit. If no valid conduit is available then -1 + * (SMCCC_RET_NOT_SUPPORTED) is returned in @res.a0 (if supplied). + * + * The return value also provides the conduit that was used. + */ +#define arm_smccc_1_1_invoke(...) ({ \ + int method = arm_smccc_1_1_get_conduit(); \ + switch (method) { \ + case SMCCC_CONDUIT_HVC: \ + arm_smccc_1_1_hvc(__VA_ARGS__); \ + break; \ + case SMCCC_CONDUIT_SMC: \ + arm_smccc_1_1_smc(__VA_ARGS__); \ + break; \ + default: \ + __fail_smccc_1_1(__VA_ARGS__); \ + method = SMCCC_CONDUIT_NONE; \ + break; \ + } \ + method; \ + }) + /* Paravirtualised time calls (defined by ARM DEN0057A) */ #define ARM_SMCCC_HV_PV_TIME_FEATURES \ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ -- cgit v1.2.3 From e0685fa228fdaf386f82ac0d64b2d6f3e0ddd588 Mon Sep 17 00:00:00 2001 From: Steven Price Date: Mon, 21 Oct 2019 16:28:23 +0100 Subject: arm64: Retrieve stolen time as paravirtualized guest Enable paravirtualization features when running under a hypervisor supporting the PV_TIME_ST hypercall. For each (v)CPU, we ask the hypervisor for the location of a shared page which the hypervisor will use to report stolen time to us. We set pv_time_ops to the stolen time function which simply reads the stolen value from the shared page for a VCPU. We guarantee single-copy atomicity using READ_ONCE which means we can also read the stolen time for another VCPU than the currently running one while it is potentially being updated by the hypervisor. Signed-off-by: Steven Price Signed-off-by: Marc Zyngier --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 068793a619ca..89d75edb5750 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -136,6 +136,7 @@ enum cpuhp_state { /* Must be the last timer callback */ CPUHP_AP_DUMMY_TIMER_STARTING, CPUHP_AP_ARM_XEN_STARTING, + CPUHP_AP_ARM_KVMPV_STARTING, CPUHP_AP_ARM_CORESIGHT_STARTING, CPUHP_AP_ARM64_ISNDEP_STARTING, CPUHP_AP_SMPCFD_DYING, -- cgit v1.2.3 From 1565bdad59e97f31cfc7b065bc0fc77e9549e62d Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 9 Oct 2019 16:34:17 -0700 Subject: fscrypt: remove struct fscrypt_ctx Now that ext4 and f2fs implement their own post-read workflow that supports both fscrypt and fsverity, the fscrypt-only workflow based around struct fscrypt_ctx is no longer used. So remove the unused code. This is based on a patch from Chandan Rajendra's "Consolidate FS read I/O callbacks code" patchset, but rebased onto the latest kernel, folded __fscrypt_decrypt_bio() into fscrypt_decrypt_bio(), cleaned up fscrypt_initialize(), and updated the commit message. Originally-from: Chandan Rajendra Signed-off-by: Eric Biggers --- include/linux/fscrypt.h | 32 -------------------------------- 1 file changed, 32 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index f622f7460ed8..04f5ed628445 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -20,7 +20,6 @@ #define FS_CRYPTO_BLOCK_SIZE 16 -struct fscrypt_ctx; struct fscrypt_info; struct fscrypt_str { @@ -64,18 +63,6 @@ struct fscrypt_operations { unsigned int max_namelen; }; -/* Decryption work */ -struct fscrypt_ctx { - union { - struct { - struct bio *bio; - struct work_struct work; - }; - struct list_head free_list; /* Free list */ - }; - u8 flags; /* Flags */ -}; - static inline bool fscrypt_has_encryption_key(const struct inode *inode) { /* pairs with cmpxchg_release() in fscrypt_get_encryption_info() */ @@ -102,8 +89,6 @@ static inline void fscrypt_handle_d_move(struct dentry *dentry) /* crypto.c */ extern void fscrypt_enqueue_decrypt_work(struct work_struct *); -extern struct fscrypt_ctx *fscrypt_get_ctx(gfp_t); -extern void fscrypt_release_ctx(struct fscrypt_ctx *); extern struct page *fscrypt_encrypt_pagecache_blocks(struct page *page, unsigned int len, @@ -244,8 +229,6 @@ static inline bool fscrypt_match_name(const struct fscrypt_name *fname, /* bio.c */ extern void fscrypt_decrypt_bio(struct bio *); -extern void fscrypt_enqueue_decrypt_bio(struct fscrypt_ctx *ctx, - struct bio *bio); extern int fscrypt_zeroout_range(const struct inode *, pgoff_t, sector_t, unsigned int); @@ -295,16 +278,6 @@ static inline void fscrypt_enqueue_decrypt_work(struct work_struct *work) { } -static inline struct fscrypt_ctx *fscrypt_get_ctx(gfp_t gfp_flags) -{ - return ERR_PTR(-EOPNOTSUPP); -} - -static inline void fscrypt_release_ctx(struct fscrypt_ctx *ctx) -{ - return; -} - static inline struct page *fscrypt_encrypt_pagecache_blocks(struct page *page, unsigned int len, unsigned int offs, @@ -484,11 +457,6 @@ static inline void fscrypt_decrypt_bio(struct bio *bio) { } -static inline void fscrypt_enqueue_decrypt_bio(struct fscrypt_ctx *ctx, - struct bio *bio) -{ -} - static inline int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, sector_t pblk, unsigned int len) { -- cgit v1.2.3 From a362687404edc5d73a4fc281af3b2b1542ef194e Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Mon, 21 Oct 2019 18:52:24 -0700 Subject: soc: mmp: guard include of asm/cputype.h with CONFIG_ARM{,64} Since this driver is enabled for COMPILE_TEST, it avoids build error on x86 allmodconfig: In file included from /build/drivers/phy/marvell/phy-mmp3-usb.c:12: /build/include/linux/soc/mmp/cputype.h:5:10: fatal error: asm/cputype.h: No such file or directory Link: https://lore.kernel.org/r/20191022015658.14624-1-olof@lixom.net Signed-off-by: Olof Johansson --- include/linux/soc/mmp/cputype.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/mmp/cputype.h b/include/linux/soc/mmp/cputype.h index c3ec88983e94..221790761e8e 100644 --- a/include/linux/soc/mmp/cputype.h +++ b/include/linux/soc/mmp/cputype.h @@ -2,7 +2,9 @@ #ifndef __ASM_MACH_CPUTYPE_H #define __ASM_MACH_CPUTYPE_H +#if defined(CONFIG_ARM) || defined(CONFIG_ARM64) #include +#endif /* * CPU Stepping CPU_ID CHIP_ID -- cgit v1.2.3 From 771fdcf8d3d04e77ae0f0dc1018144206a61d216 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 16 Oct 2019 16:57:53 +0200 Subject: PM / OPP: Support adjusting OPP voltages at runtime On some SoCs the Adaptive Voltage Scaling (AVS) technique is employed to optimize the operating voltage of a device. At a given frequency, the hardware monitors dynamic factors and either makes a suggestion for how much to adjust a voltage for the current frequency, or it automatically adjusts the voltage without software intervention. Add an API to the OPP library for the former case, so that AVS type devices can update the voltages for an OPP when the hardware determines the voltage should change. The assumption is that drivers like CPUfreq or devfreq will register for the OPP notifiers and adjust the voltage according to suggestions that AVS makes. This patch is derived from [1] submitted by Stephen. [1] https://lore.kernel.org/patchwork/patch/599279/ Signed-off-by: Stephen Boyd Signed-off-by: Roger Lu [s.nawrocki@samsung.com: added handling of OPP min/max voltage] Signed-off-by: Sylwester Nawrocki Signed-off-by: Viresh Kumar --- include/linux/pm_opp.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index b8197ab014f2..747861816f4f 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -22,6 +22,7 @@ struct opp_table; enum dev_pm_opp_event { OPP_EVENT_ADD, OPP_EVENT_REMOVE, OPP_EVENT_ENABLE, OPP_EVENT_DISABLE, + OPP_EVENT_ADJUST_VOLTAGE, }; /** @@ -113,6 +114,10 @@ int dev_pm_opp_add(struct device *dev, unsigned long freq, void dev_pm_opp_remove(struct device *dev, unsigned long freq); void dev_pm_opp_remove_all_dynamic(struct device *dev); +int dev_pm_opp_adjust_voltage(struct device *dev, unsigned long freq, + unsigned long u_volt, unsigned long u_volt_min, + unsigned long u_volt_max); + int dev_pm_opp_enable(struct device *dev, unsigned long freq); int dev_pm_opp_disable(struct device *dev, unsigned long freq); @@ -242,6 +247,14 @@ static inline void dev_pm_opp_remove_all_dynamic(struct device *dev) { } +static inline int +dev_pm_opp_adjust_voltage(struct device *dev, unsigned long freq, + unsigned long u_volt, unsigned long u_volt_min, + unsigned long u_volt_max) +{ + return 0; +} + static inline int dev_pm_opp_enable(struct device *dev, unsigned long freq) { return 0; -- cgit v1.2.3 From 78958563d8023db0c6d03a2fe2a64d79b47b4349 Mon Sep 17 00:00:00 2001 From: Aaron Lewis Date: Mon, 21 Oct 2019 16:30:22 -0700 Subject: KVM: x86: Remove unneeded kvm_vcpu variable, guest_xcr0_loaded The kvm_vcpu variable, guest_xcr0_loaded, is a waste of an 'int' and a conditional branch. VMX and SVM are the only users, and both unconditionally pair kvm_load_guest_xcr0() with kvm_put_guest_xcr0() making this check unnecessary. Without this variable, the predicates in kvm_load_guest_xcr0 and kvm_put_guest_xcr0 should match. Suggested-by: Sean Christopherson Reviewed-by: Jim Mattson Signed-off-by: Aaron Lewis Change-Id: I7b1eb9b62969d7bbb2850f27e42f863421641b23 Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 719fc3e15ea4..d2017302996c 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -278,7 +278,6 @@ struct kvm_vcpu { struct mutex mutex; struct kvm_run *run; - int guest_xcr0_loaded; struct swait_queue_head wq; struct pid __rcu *pid; int sigset_active; -- cgit v1.2.3 From 149487bdacde32f5a9a344a49533ae0772fb9db7 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 21 Oct 2019 15:58:42 -0700 Subject: KVM: Add separate helper for putting borrowed reference to kvm Add a new helper, kvm_put_kvm_no_destroy(), to handle putting a borrowed reference[*] to the VM when installing a new file descriptor fails. KVM expects the refcount to remain valid in this case, as the in-progress ioctl() has an explicit reference to the VM. The primary motiviation for the helper is to document that the 'kvm' pointer is still valid after putting the borrowed reference, e.g. to document that doing mutex(&kvm->lock) immediately after putting a ref to kvm isn't broken. [*] When exposing a new object to userspace via a file descriptor, e.g. a new vcpu, KVM grabs a reference to itself (the VM) prior to making the object visible to userspace to avoid prematurely freeing the VM in the scenario where userspace immediately closes file descriptor. Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index d2017302996c..a817e446c9aa 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -621,6 +621,7 @@ void kvm_exit(void); void kvm_get_kvm(struct kvm *kvm); void kvm_put_kvm(struct kvm *kvm); +void kvm_put_kvm_no_destroy(struct kvm *kvm); static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id) { -- cgit v1.2.3 From cbb79863fc3175ed5ac506465948b02a893a8235 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Mon, 14 Oct 2019 10:35:56 -0500 Subject: ipmi: Don't allow device module unload when in use If something has the IPMI driver open, don't allow the device module to be unloaded. Before it would unload and the user would get errors on use. This change is made on user request, and it makes it consistent with the I2C driver, which has the same behavior. It does change things a little bit with respect to kernel users. If the ACPI or IPMI watchdog (or any other kernel user) has created a user, then the device module cannot be unloaded. Before it could be unloaded, This does not affect hot-plug. If the device goes away (it's on something removable that is removed or is hot-removed via sysfs) then it still behaves as it did before. Reported-by: tony camuso Signed-off-by: Corey Minyard Tested-by: tony camuso --- include/linux/ipmi_smi.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipmi_smi.h b/include/linux/ipmi_smi.h index 4dc66157d872..deec18b8944a 100644 --- a/include/linux/ipmi_smi.h +++ b/include/linux/ipmi_smi.h @@ -224,10 +224,14 @@ static inline int ipmi_demangle_device_id(uint8_t netfn, uint8_t cmd, * is called, and the lower layer must get the interface from that * call. */ -int ipmi_register_smi(const struct ipmi_smi_handlers *handlers, - void *send_info, - struct device *dev, - unsigned char slave_addr); +int ipmi_add_smi(struct module *owner, + const struct ipmi_smi_handlers *handlers, + void *send_info, + struct device *dev, + unsigned char slave_addr); + +#define ipmi_register_smi(handlers, send_info, dev, slave_addr) \ + ipmi_add_smi(THIS_MODULE, handlers, send_info, dev, slave_addr) /* * Remove a low-level interface from the IPMI driver. This will -- cgit v1.2.3 From 4b97ba73dcdc24fd968cbeb970ae57212e2c1c73 Mon Sep 17 00:00:00 2001 From: Jethro Beekman Date: Wed, 4 Sep 2019 01:15:24 +0000 Subject: mtd: spi-nor: intel-spi: add support for Intel Cannon Lake SPI flash Now that SPI flash controllers without a software sequencer are supported, it's trivial to add support for CNL and its PCI ID. Values from https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/300-series-chipset-pch-datasheet-vol-2.pdf Signed-off-by: Jethro Beekman Reviewed-by: Mika Westerberg Signed-off-by: Tudor Ambarus --- include/linux/platform_data/intel-spi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/intel-spi.h b/include/linux/platform_data/intel-spi.h index ebb4f332588b..7f53a5c6f35e 100644 --- a/include/linux/platform_data/intel-spi.h +++ b/include/linux/platform_data/intel-spi.h @@ -13,6 +13,7 @@ enum intel_spi_type { INTEL_SPI_BYT = 1, INTEL_SPI_LPT, INTEL_SPI_BXT, + INTEL_SPI_CNL, }; /** -- cgit v1.2.3 From 45397787536434648495f7b02a7e669ab8ae12f3 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 24 Sep 2019 07:45:53 +0000 Subject: mtd: spi-nor: Introduce 'struct spi_nor_controller_ops' Move all SPI NOR controller driver specific ops in a dedicated structure. 'struct spi_nor' becomes lighter. Use size_t for lengths in 'int (*write_reg)()' and 'int (*read_reg)()'. Rename wite/read_buf to buf, the name of the functions are suggestive enough. Constify buf in int (*write_reg). Comply with these changes in the SPI NOR controller drivers. Suggested-by: Boris Brezillon Signed-off-by: Tudor Ambarus Reviewed-by: Boris Brezillon --- include/linux/mtd/spi-nor.h | 51 ++++++++++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index fc0b4b19c900..d1d736d3c8ab 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -465,6 +465,34 @@ enum spi_nor_pp_command_index { /* Forward declaration that will be used in 'struct spi_nor_flash_parameter' */ struct spi_nor; +/** + * struct spi_nor_controller_ops - SPI NOR controller driver specific + * operations. + * @prepare: [OPTIONAL] do some preparations for the + * read/write/erase/lock/unlock operations. + * @unprepare: [OPTIONAL] do some post work after the + * read/write/erase/lock/unlock operations. + * @read_reg: read out the register. + * @write_reg: write data to the register. + * @read: read data from the SPI NOR. + * @write: write data to the SPI NOR. + * @erase: erase a sector of the SPI NOR at the offset @offs; if + * not provided by the driver, spi-nor will send the erase + * opcode via write_reg(). + */ +struct spi_nor_controller_ops { + int (*prepare)(struct spi_nor *nor, enum spi_nor_ops ops); + void (*unprepare)(struct spi_nor *nor, enum spi_nor_ops ops); + int (*read_reg)(struct spi_nor *nor, u8 opcode, u8 *buf, size_t len); + int (*write_reg)(struct spi_nor *nor, u8 opcode, const u8 *buf, + size_t len); + + ssize_t (*read)(struct spi_nor *nor, loff_t from, size_t len, u8 *buf); + ssize_t (*write)(struct spi_nor *nor, loff_t to, size_t len, + const u8 *buf); + int (*erase)(struct spi_nor *nor, loff_t offs); +}; + /** * struct spi_nor_locking_ops - SPI NOR locking methods * @lock: lock a region of the SPI NOR. @@ -549,17 +577,7 @@ struct flash_info; * @read_proto: the SPI protocol for read operations * @write_proto: the SPI protocol for write operations * @reg_proto the SPI protocol for read_reg/write_reg/erase operations - * @prepare: [OPTIONAL] do some preparations for the - * read/write/erase/lock/unlock operations - * @unprepare: [OPTIONAL] do some post work after the - * read/write/erase/lock/unlock operations - * @read_reg: [DRIVER-SPECIFIC] read out the register - * @write_reg: [DRIVER-SPECIFIC] write data to the register - * @read: [DRIVER-SPECIFIC] read data from the SPI NOR - * @write: [DRIVER-SPECIFIC] write data to the SPI NOR - * @erase: [DRIVER-SPECIFIC] erase a sector of the SPI NOR - * at the offset @offs; if not provided by the driver, - * spi-nor will send the erase opcode via write_reg() + * @controller_ops: SPI NOR controller driver specific operations. * @clear_sr_bp: [FLASH-SPECIFIC] clears the Block Protection Bits from * the SPI NOR Status Register. * @params: [FLASH-SPECIFIC] SPI-NOR flash parameters and settings. @@ -588,16 +606,7 @@ struct spi_nor { bool sst_write_second; u32 flags; - int (*prepare)(struct spi_nor *nor, enum spi_nor_ops ops); - void (*unprepare)(struct spi_nor *nor, enum spi_nor_ops ops); - int (*read_reg)(struct spi_nor *nor, u8 opcode, u8 *buf, int len); - int (*write_reg)(struct spi_nor *nor, u8 opcode, u8 *buf, int len); - - ssize_t (*read)(struct spi_nor *nor, loff_t from, - size_t len, u_char *read_buf); - ssize_t (*write)(struct spi_nor *nor, loff_t to, - size_t len, const u_char *write_buf); - int (*erase)(struct spi_nor *nor, loff_t offs); + const struct spi_nor_controller_ops *controller_ops; int (*clear_sr_bp)(struct spi_nor *nor); struct spi_nor_flash_parameter params; -- cgit v1.2.3 From 90767cdfea89c3ac7012035d66633b9ba839091a Mon Sep 17 00:00:00 2001 From: Nagarjuna Kristam Date: Fri, 18 Oct 2019 15:08:08 +0530 Subject: phy: tegra: xusb: Add vbus override support on Tegra210 Tegra XUSB device control driver needs to control vbus override during its operations, add API for the support. Signed-off-by: Nagarjuna Kristam Acked-by: Thierry Reding Signed-off-by: Kishon Vijay Abraham I --- include/linux/phy/tegra/xusb.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/phy/tegra/xusb.h b/include/linux/phy/tegra/xusb.h index ee59562c8354..1235865e7e2c 100644 --- a/include/linux/phy/tegra/xusb.h +++ b/include/linux/phy/tegra/xusb.h @@ -18,5 +18,7 @@ int tegra_xusb_padctl_hsic_set_idle(struct tegra_xusb_padctl *padctl, unsigned int port, bool idle); int tegra_xusb_padctl_usb3_set_lfps_detect(struct tegra_xusb_padctl *padctl, unsigned int port, bool enable); - +int tegra_xusb_padctl_set_vbus_override(struct tegra_xusb_padctl *padctl, + bool val); +int tegra_phy_xusb_utmi_port_reset(struct phy *phy); #endif /* PHY_TEGRA_XUSB_H */ -- cgit v1.2.3 From 2952db0fd51b0890f728df94ac563c21407f4f43 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 11 Sep 2018 16:55:03 +0200 Subject: compat_ioctl: add compat_ptr_ioctl() Many drivers have ioctl() handlers that are completely compatible between 32-bit and 64-bit architectures, except for the argument that is passed down from user space and may have to be passed through compat_ptr() in order to become a valid 64-bit pointer. Using ".compat_ptr = compat_ptr_ioctl" in file operations should let us simplify a lot of those drivers to avoid #ifdef checks, and convert additional drivers that don't have proper compat handling yet. On most architectures, the compat_ptr_ioctl() just passes all arguments to the corresponding ->ioctl handler. The exception is arch/s390, where compat_ptr() clears the top bit of a 32-bit pointer value, so user space pointers to the second 2GB alias the first 2GB, as is the case for native 32-bit s390 user space. The compat_ptr_ioctl() function must therefore be used only with ioctl functions that either ignore the argument or pass a pointer to a compatible data type. If any ioctl command handled by fops->unlocked_ioctl passes a plain integer instead of a pointer, or any of the passed data types is incompatible between 32-bit and 64-bit architectures, a proper handler is required instead of compat_ptr_ioctl. Signed-off-by: Arnd Bergmann --- v3: add a better description v2: use compat_ptr_ioctl instead of generic_compat_ioctl_ptrarg, as suggested by Al Viro --- include/linux/fs.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index e0d909d35763..0b4d8fc79e0f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1727,6 +1727,13 @@ int vfs_mkobj(struct dentry *, umode_t, extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); +#ifdef CONFIG_COMPAT +extern long compat_ptr_ioctl(struct file *file, unsigned int cmd, + unsigned long arg); +#else +#define compat_ptr_ioctl NULL +#endif + /* * VFS file helper functions. */ -- cgit v1.2.3 From 011da44bc5b6520d00b42c584a4fefc85f7b332b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 21 Apr 2019 19:24:03 -0400 Subject: compat: move FS_IOC_RESVSP_32 handling to fs/ioctl.c ... and lose the ridiculous games with compat_alloc_user_space() there. Signed-off-by: Al Viro Signed-off-by: Arnd Bergmann --- include/linux/falloc.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/falloc.h b/include/linux/falloc.h index 674d59f4d6ce..fc61fdb9d1e9 100644 --- a/include/linux/falloc.h +++ b/include/linux/falloc.h @@ -29,4 +29,24 @@ struct space_resv { FALLOC_FL_INSERT_RANGE | \ FALLOC_FL_UNSHARE_RANGE) +/* on ia32 l_start is on a 32-bit boundary */ +#if defined(CONFIG_X86_64) +struct space_resv_32 { + __s16 l_type; + __s16 l_whence; + __s64 l_start __attribute__((packed)); + /* len == 0 means until end of file */ + __s64 l_len __attribute__((packed)); + __s32 l_sysid; + __u32 l_pid; + __s32 l_pad[4]; /* reserve area */ +}; + +#define FS_IOC_RESVSP_32 _IOW ('X', 40, struct space_resv_32) +#define FS_IOC_RESVSP64_32 _IOW ('X', 42, struct space_resv_32) + +int compat_ioctl_preallocate(struct file *, struct space_resv_32 __user *); + +#endif + #endif /* _FALLOC_H_ */ -- cgit v1.2.3 From 1207045da5a7c94344e0ea9a9e7495985eef499a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 7 Sep 2018 16:49:43 +0200 Subject: compat_ioctl: move tape handling into drivers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MTIOCPOS and MTIOCGET are incompatible between 32-bit and 64-bit user space, and traditionally have been translated in fs/compat_ioctl.c. To get rid of that translation handler, move a corresponding implementation into each of the four drivers implementing those commands. The interesting part of that is now in a new linux/mtio.h header that wraps the existing uapi/linux/mtio.h header and provides an abstraction to let drivers handle both cases easily. Using an in_compat_syscall() check, the caller does not have to keep track of whether this was called through .unlocked_ioctl() or .compat_ioctl(). Acked-by: Heiko Carstens Cc: "Kai Mäkisara" Cc: linux-scsi@vger.kernel.org Cc: "James E.J. Bottomley" Cc: "Martin K. Petersen" Cc: "David S. Miller" Signed-off-by: Arnd Bergmann --- include/linux/mtio.h | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 include/linux/mtio.h (limited to 'include/linux') diff --git a/include/linux/mtio.h b/include/linux/mtio.h new file mode 100644 index 000000000000..67d03156f2c2 --- /dev/null +++ b/include/linux/mtio.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_MTIO_COMPAT_H +#define _LINUX_MTIO_COMPAT_H + +#include +#include +#include + +/* + * helper functions for implementing compat ioctls on the four tape + * drivers: we define the 32-bit layout of each incompatible structure, + * plus a wrapper function to copy it to user space in either format. + */ + +struct mtget32 { + s32 mt_type; + s32 mt_resid; + s32 mt_dsreg; + s32 mt_gstat; + s32 mt_erreg; + s32 mt_fileno; + s32 mt_blkno; +}; +#define MTIOCGET32 _IOR('m', 2, struct mtget32) + +struct mtpos32 { + s32 mt_blkno; +}; +#define MTIOCPOS32 _IOR('m', 3, struct mtpos32) + +static inline int put_user_mtget(void __user *u, struct mtget *k) +{ + struct mtget32 k32 = { + .mt_type = k->mt_type, + .mt_resid = k->mt_resid, + .mt_dsreg = k->mt_dsreg, + .mt_gstat = k->mt_gstat, + .mt_erreg = k->mt_erreg, + .mt_fileno = k->mt_fileno, + .mt_blkno = k->mt_blkno, + }; + int ret; + + if (in_compat_syscall()) + ret = copy_to_user(u, &k32, sizeof(k32)); + else + ret = copy_to_user(u, k, sizeof(*k)); + + return ret ? -EFAULT : 0; +} + +static inline int put_user_mtpos(void __user *u, struct mtpos *k) +{ + if (in_compat_syscall()) + return put_user(k->mt_blkno, (u32 __user *)u); + else + return put_user(k->mt_blkno, (long __user *)u); +} + +#endif -- cgit v1.2.3 From 98aaaec4a150c39219a8aaa68c3adc6eed443ea8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 14 Mar 2019 17:45:18 +0100 Subject: compat_ioctl: reimplement SG_IO handling There are two code locations that implement the SG_IO ioctl: the old sg.c driver, and the generic scsi_ioctl helper that is in turn used by multiple drivers. To eradicate the old compat_ioctl conversion handler for the SG_IO command, I implement a readable pair of put_sg_io_hdr() /get_sg_io_hdr() helper functions that can be used for both compat and native mode, and then I call this from both drivers. For the iovec handling, there is already a compat_import_iovec() function that can simply be called in place of import_iovec(). To avoid having to pass the compat/native state through multiple indirections, I mark the SG_IO command itself as compatible in fs/compat_ioctl.c and use in_compat_syscall() to figure out where we are called from. As a side-effect of this, the sg.c driver now also accepts the 32-bit sg_io_hdr format in compat mode using the read/write interface, not just ioctl. This should improve compatiblity with old 32-bit binaries, but it would break if any application intentionally passes the 64-bit data structure in compat mode here. Steffen Maier helped debug an issue in an earlier version of this patch. Cc: Steffen Maier Cc: linux-scsi@vger.kernel.org Cc: Doug Gilbert Cc: "James E.J. Bottomley" Cc: "Martin K. Petersen" Signed-off-by: Arnd Bergmann --- include/linux/blkdev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f3ea78b0c91c..2c8cd22b176b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -870,6 +870,8 @@ extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t, unsigned int, void __user *); extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, struct scsi_ioctl_command __user *); +extern int get_sg_io_hdr(struct sg_io_hdr *hdr, const void __user *argp); +extern int put_sg_io_hdr(const struct sg_io_hdr *hdr, void __user *argp); extern int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags); extern void blk_queue_exit(struct request_queue *q); -- cgit v1.2.3 From ce4dd4429b3c7e4506870796f3b8b06d707d2928 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 16 Oct 2019 15:13:41 +0100 Subject: Remove the nr_exclusive argument from __wake_up_sync_key() Remove the nr_exclusive argument from __wake_up_sync_key() and derived functions as everything seems to set it to 1. Note also that if it wasn't set to 1, it would clear WF_SYNC anyway. Signed-off-by: David Howells Acked-by: Peter Zijlstra (Intel) --- include/linux/wait.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/wait.h b/include/linux/wait.h index 3eb7cae8206c..bb7676d396cd 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -201,9 +201,9 @@ void __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr, void void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key); void __wake_up_locked_key_bookmark(struct wait_queue_head *wq_head, unsigned int mode, void *key, wait_queue_entry_t *bookmark); -void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key); +void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, void *key); void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr); -void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr); +void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode); #define wake_up(x) __wake_up(x, TASK_NORMAL, 1, NULL) #define wake_up_nr(x, nr) __wake_up(x, TASK_NORMAL, nr, NULL) @@ -214,7 +214,7 @@ void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr); #define wake_up_interruptible(x) __wake_up(x, TASK_INTERRUPTIBLE, 1, NULL) #define wake_up_interruptible_nr(x, nr) __wake_up(x, TASK_INTERRUPTIBLE, nr, NULL) #define wake_up_interruptible_all(x) __wake_up(x, TASK_INTERRUPTIBLE, 0, NULL) -#define wake_up_interruptible_sync(x) __wake_up_sync((x), TASK_INTERRUPTIBLE, 1) +#define wake_up_interruptible_sync(x) __wake_up_sync((x), TASK_INTERRUPTIBLE) /* * Wakeup macros to be used to report events to the targets. @@ -228,7 +228,7 @@ void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr); #define wake_up_interruptible_poll(x, m) \ __wake_up(x, TASK_INTERRUPTIBLE, 1, poll_to_key(m)) #define wake_up_interruptible_sync_poll(x, m) \ - __wake_up_sync_key((x), TASK_INTERRUPTIBLE, 1, poll_to_key(m)) + __wake_up_sync_key((x), TASK_INTERRUPTIBLE, poll_to_key(m)) #define ___wait_cond_timeout(condition) \ ({ \ -- cgit v1.2.3 From a3470c1829c0c856a19c10af58f8e7792ae27d7a Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Wed, 23 Oct 2019 10:00:46 +0300 Subject: spi: document CS setup, hold & inactive times in header This change documents the CS setup, host & inactive times. They were omitted when the fields were added, and were caught by one of the build bots. Fixes: 25093bdeb6bc ("spi: implement SW control for CS times") Reported-by: kbuild test robot Signed-off-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20191023070046.12478-1-alexandru.ardelean@analog.com Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index c40d6af2bf07..98fe8663033a 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -407,6 +407,11 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv) * controller has native support for memory like operations. * @unprepare_message: undo any work done by prepare_message(). * @slave_abort: abort the ongoing transfer request on an SPI slave controller + * @cs_setup: delay to be introduced by the controller after CS is asserted + * @cs_hold: delay to be introduced by the controller before CS is deasserted + * @cs_inactive: delay to be introduced by the controller after CS is + * deasserted. If @cs_change_delay is used from @spi_transfer, then the + * two delays will be added up. * @cs_gpios: LEGACY: array of GPIO descs to use as chip select lines; one per * CS number. Any individual value may be -ENOENT for CS lines that * are not GPIOs (driven by the SPI controller itself). Use the cs_gpiods -- cgit v1.2.3 From fa6e98cee558622565c97924e922b97340aeabd8 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Tue, 22 Oct 2019 11:31:07 -0700 Subject: net: phy: add support for clause 37 auto-negotiation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds support for clause 37 1000Base-X auto-negotiation. Signed-off-by: Heiner Kallweit Signed-off-by: Tao Ren Tested-by: René van Dorst Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/phy.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 9a0e981df502..78436d58ce7c 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1106,6 +1106,10 @@ int genphy_read_mmd_unsupported(struct phy_device *phdev, int devad, int genphy_write_mmd_unsupported(struct phy_device *phdev, int devnum, u16 regnum, u16 val); +/* Clause 37 */ +int genphy_c37_config_aneg(struct phy_device *phydev); +int genphy_c37_read_status(struct phy_device *phydev); + /* Clause 45 PHY */ int genphy_c45_restart_aneg(struct phy_device *phydev); int genphy_c45_check_and_restart_aneg(struct phy_device *phydev, bool restart); -- cgit v1.2.3 From b9bcb95315febd09419ab870ddc7cb98a393f9d0 Mon Sep 17 00:00:00 2001 From: Tao Ren Date: Tue, 22 Oct 2019 11:31:08 -0700 Subject: net: phy: broadcom: add 1000Base-X support for BCM54616S The BCM54616S PHY cannot work properly in RGMII->1000Base-X mode, mainly because genphy functions are designed for copper links, and 1000Base-X (clause 37) auto negotiation needs to be handled differently. This patch enables 1000Base-X support for BCM54616S by customizing 3 driver callbacks, and it's verified to be working on Facebook CMM BMC platform (RGMII->1000Base-KX): - probe: probe callback detects PHY's operation mode based on INTERF_SEL[1:0] pins and 1000X/100FX selection bit in SerDES 100-FX Control register. - config_aneg: calls genphy_c37_config_aneg when the PHY is running in 1000Base-X mode; otherwise, genphy_config_aneg will be called. - read_status: calls genphy_c37_read_status when the PHY is running in 1000Base-X mode; otherwise, genphy_read_status will be called. Note: BCM54616S PHY can also be configured in RGMII->100Base-FX mode, and 100Base-FX support is not available as of now. Signed-off-by: Tao Ren Acked-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 6db2d9a6e503..b475e7f20d28 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -200,9 +200,15 @@ #define BCM5482_SHD_SSD 0x14 /* 10100: Secondary SerDes control */ #define BCM5482_SHD_SSD_LEDM 0x0008 /* SSD LED Mode enable */ #define BCM5482_SHD_SSD_EN 0x0001 /* SSD enable */ -#define BCM5482_SHD_MODE 0x1f /* 11111: Mode Control Register */ -#define BCM5482_SHD_MODE_1000BX 0x0001 /* Enable 1000BASE-X registers */ +/* 10011: SerDes 100-FX Control Register */ +#define BCM54616S_SHD_100FX_CTRL 0x13 +#define BCM54616S_100FX_MODE BIT(0) /* 100-FX SerDes Enable */ + +/* 11111: Mode Control Register */ +#define BCM54XX_SHD_MODE 0x1f +#define BCM54XX_SHD_INTF_SEL_MASK GENMASK(2, 1) /* INTERF_SEL[1:0] */ +#define BCM54XX_SHD_MODE_1000BX BIT(0) /* Enable 1000-X registers */ /* * EXPANSION SHADOW ACCESS REGISTERS. (PHY REG 0x15, 0x16, and 0x17) -- cgit v1.2.3 From 15fd552d186cb0df34b9d36a07dd6677c4da56bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 3 Jul 2018 16:42:26 +0200 Subject: dma-buf: change DMA-buf locking convention v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch is a stripped down version of the locking changes necessary to support dynamic DMA-buf handling. It adds a dynamic flag for both importers as well as exporters so that drivers can choose if they want the reservation object locked or unlocked during mapping of attachments. For compatibility between drivers we cache the DMA-buf mapping during attaching an importer as soon as exporter/importer disagree on the dynamic handling. Issues and solutions we considered: - We can't change all existing drivers, and existing improters have strong opinions about which locks they're holding while calling dma_buf_attachment_map/unmap. Exporters also have strong opinions about which locks they can acquire in their ->map/unmap callbacks, levaing no room for change. The solution to avoid this was to move the actual map/unmap out from this call, into the attach/detach callbacks, and cache the mapping. This works because drivers don't call attach/detach from deep within their code callchains (like deep in memory management code called from cs/execbuf ioctl), but directly from the fd2handle implementation. - The caching has some troubles on some soc drivers, which set other modes than DMA_BIDIRECTIONAL. We can't have 2 incompatible mappings, and we can't re-create the mapping at _map time due to the above locking fun. We very carefuly step around that by only caching at attach time if the dynamic mode between importer/expoert mismatches. - There's been quite some discussion on dma-buf mappings which need active cache management, which would all break down when caching, plus we don't have explicit flush operations on the attachment side. The solution to this was to shrug and keep the current discrepancy between what the dma-buf docs claim and what implementations do, with the hope that the begin/end_cpu_access hooks are good enough and that all necessary flushing to keep device mappings consistent will be done there. v2: cleanup set_name merge, improve kerneldoc v3: update commit message, kerneldoc and cleanup _debug_show() Signed-off-by: Christian König Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/336788/ --- include/linux/dma-buf.h | 63 ++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 57 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index ec212cb27fdc..af73f835c51c 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -42,6 +42,18 @@ struct dma_buf_ops { */ bool cache_sgt_mapping; + /** + * @dynamic_mapping: + * + * If true the framework makes sure that the map/unmap_dma_buf + * callbacks are always called with the dma_resv object locked. + * + * If false the framework makes sure that the map/unmap_dma_buf + * callbacks are always called without the dma_resv object locked. + * Mutual exclusive with @cache_sgt_mapping. + */ + bool dynamic_mapping; + /** * @attach: * @@ -109,6 +121,9 @@ struct dma_buf_ops { * any other kind of sharing that the exporter might wish to make * available to buffer-users. * + * This is always called with the dmabuf->resv object locked when + * the dynamic_mapping flag is true. + * * Returns: * * A &sg_table scatter list of or the backing storage of the DMA buffer, @@ -267,14 +282,16 @@ struct dma_buf_ops { * struct dma_buf - shared buffer object * @size: size of the buffer * @file: file pointer used for sharing buffers across, and for refcounting. - * @attachments: list of dma_buf_attachment that denotes all devices attached. + * @attachments: list of dma_buf_attachment that denotes all devices attached, + * protected by dma_resv lock. * @ops: dma_buf_ops associated with this buffer object. * @lock: used internally to serialize list manipulation, attach/detach and - * vmap/unmap, and accesses to name + * vmap/unmap * @vmapping_counter: used internally to refcnt the vmaps * @vmap_ptr: the current vmap ptr if vmapping_counter > 0 * @exp_name: name of the exporter; useful for debugging. - * @name: userspace-provided name; useful for accounting and debugging. + * @name: userspace-provided name; useful for accounting and debugging, + * protected by @resv. * @owner: pointer to exporter module; used for refcounting when exporter is a * kernel module. * @list_node: node for dma_buf accounting and debugging. @@ -323,10 +340,12 @@ struct dma_buf { * struct dma_buf_attachment - holds device-buffer attachment data * @dmabuf: buffer for this attachment. * @dev: device attached to the buffer. - * @node: list of dma_buf_attachment. + * @node: list of dma_buf_attachment, protected by dma_resv lock of the dmabuf. * @sgt: cached mapping. * @dir: direction of cached mapping. * @priv: exporter specific attachment data. + * @dynamic_mapping: true if dma_buf_map/unmap_attachment() is called with the + * dma_resv lock held. * * This structure holds the attachment information between the dma_buf buffer * and its user device(s). The list contains one attachment struct per device @@ -343,6 +362,7 @@ struct dma_buf_attachment { struct list_head node; struct sg_table *sgt; enum dma_data_direction dir; + bool dynamic_mapping; void *priv; }; @@ -394,10 +414,40 @@ static inline void get_dma_buf(struct dma_buf *dmabuf) get_file(dmabuf->file); } +/** + * dma_buf_is_dynamic - check if a DMA-buf uses dynamic mappings. + * @dmabuf: the DMA-buf to check + * + * Returns true if a DMA-buf exporter wants to be called with the dma_resv + * locked for the map/unmap callbacks, false if it doesn't wants to be called + * with the lock held. + */ +static inline bool dma_buf_is_dynamic(struct dma_buf *dmabuf) +{ + return dmabuf->ops->dynamic_mapping; +} + +/** + * dma_buf_attachment_is_dynamic - check if a DMA-buf attachment uses dynamic + * mappinsg + * @attach: the DMA-buf attachment to check + * + * Returns true if a DMA-buf importer wants to call the map/unmap functions with + * the dma_resv lock held. + */ +static inline bool +dma_buf_attachment_is_dynamic(struct dma_buf_attachment *attach) +{ + return attach->dynamic_mapping; +} + struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf, - struct device *dev); + struct device *dev); +struct dma_buf_attachment * +dma_buf_dynamic_attach(struct dma_buf *dmabuf, struct device *dev, + bool dynamic_mapping); void dma_buf_detach(struct dma_buf *dmabuf, - struct dma_buf_attachment *dmabuf_attach); + struct dma_buf_attachment *attach); struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info); @@ -409,6 +459,7 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *, enum dma_data_direction); void dma_buf_unmap_attachment(struct dma_buf_attachment *, struct sg_table *, enum dma_data_direction); +void dma_buf_move_notify(struct dma_buf *dma_buf); int dma_buf_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction dir); int dma_buf_end_cpu_access(struct dma_buf *dma_buf, -- cgit v1.2.3 From c512995ce9dc653111d782b130e600802da52b5f Mon Sep 17 00:00:00 2001 From: Josef Friedl Date: Tue, 10 Sep 2019 09:04:41 +0200 Subject: rtc: mt6397: move some common definitions into rtc.h move code to separate header-file to reuse definitions later in poweroff-driver (drivers/power/reset/mt6323-poweroff.c) Suggested-by: Frank Wunderlich Signed-off-by: Josef Friedl Signed-off-by: Frank Wunderlich Acked-by: Alexandre Belloni Signed-off-by: Lee Jones --- include/linux/mfd/mt6397/rtc.h | 71 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 include/linux/mfd/mt6397/rtc.h (limited to 'include/linux') diff --git a/include/linux/mfd/mt6397/rtc.h b/include/linux/mfd/mt6397/rtc.h new file mode 100644 index 000000000000..f84b9163c0ee --- /dev/null +++ b/include/linux/mfd/mt6397/rtc.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2014-2019 MediaTek Inc. + * + * Author: Tianping.Fang + * Sean Wang + */ + +#ifndef _LINUX_MFD_MT6397_RTC_H_ +#define _LINUX_MFD_MT6397_RTC_H_ + +#include +#include +#include +#include + +#define RTC_BBPU 0x0000 +#define RTC_BBPU_CBUSY BIT(6) +#define RTC_BBPU_KEY (0x43 << 8) + +#define RTC_WRTGR 0x003c + +#define RTC_IRQ_STA 0x0002 +#define RTC_IRQ_STA_AL BIT(0) +#define RTC_IRQ_STA_LP BIT(3) + +#define RTC_IRQ_EN 0x0004 +#define RTC_IRQ_EN_AL BIT(0) +#define RTC_IRQ_EN_ONESHOT BIT(2) +#define RTC_IRQ_EN_LP BIT(3) +#define RTC_IRQ_EN_ONESHOT_AL (RTC_IRQ_EN_ONESHOT | RTC_IRQ_EN_AL) + +#define RTC_AL_MASK 0x0008 +#define RTC_AL_MASK_DOW BIT(4) + +#define RTC_TC_SEC 0x000a +/* Min, Hour, Dom... register offset to RTC_TC_SEC */ +#define RTC_OFFSET_SEC 0 +#define RTC_OFFSET_MIN 1 +#define RTC_OFFSET_HOUR 2 +#define RTC_OFFSET_DOM 3 +#define RTC_OFFSET_DOW 4 +#define RTC_OFFSET_MTH 5 +#define RTC_OFFSET_YEAR 6 +#define RTC_OFFSET_COUNT 7 + +#define RTC_AL_SEC 0x0018 + +#define RTC_PDN2 0x002e +#define RTC_PDN2_PWRON_ALARM BIT(4) + +#define RTC_MIN_YEAR 1968 +#define RTC_BASE_YEAR 1900 +#define RTC_NUM_YEARS 128 +#define RTC_MIN_YEAR_OFFSET (RTC_MIN_YEAR - RTC_BASE_YEAR) + +#define MTK_RTC_POLL_DELAY_US 10 +#define MTK_RTC_POLL_TIMEOUT (jiffies_to_usecs(HZ)) + +struct mt6397_rtc { + struct device *dev; + struct rtc_device *rtc_dev; + + /* Protect register access from multiple tasks */ + struct mutex lock; + struct regmap *regmap; + int irq; + u32 addr_base; +}; + +#endif /* _LINUX_MFD_MT6397_RTC_H_ */ -- cgit v1.2.3 From a48108c0c20f02485b8cc3ca83652a55a0f5e47f Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Tue, 22 Oct 2019 16:51:37 +0200 Subject: reset: improve of_xlate documentation Mention of_reset_simple_xlate as the default if of_xlate is not set. Signed-off-by: Philipp Zabel --- include/linux/reset-controller.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/reset-controller.h b/include/linux/reset-controller.h index 9326d671b6e6..8d35753d419e 100644 --- a/include/linux/reset-controller.h +++ b/include/linux/reset-controller.h @@ -62,7 +62,8 @@ struct reset_control_lookup { * @of_node: corresponding device tree node as phandle target * @of_reset_n_cells: number of cells in reset line specifiers * @of_xlate: translation function to translate from specifier as found in the - * device tree to id as given to the reset control ops + * device tree to id as given to the reset control ops, defaults + * to :c:func:`of_reset_simple_xlate`. * @nr_resets: number of reset controls in this reset controller device */ struct reset_controller_dev { -- cgit v1.2.3 From c2ffa00ad6152ad54940f942fc316b9c83d5e6f9 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Tue, 22 Oct 2019 16:53:25 +0200 Subject: reset: document (devm_)reset_control_get_optional variants Add kerneldoc comments for the optional reset_control_get variants. Signed-off-by: Philipp Zabel --- include/linux/reset.h | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) (limited to 'include/linux') diff --git a/include/linux/reset.h b/include/linux/reset.h index e7793fc0fa93..bf7c7f188406 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -203,12 +203,34 @@ static inline struct reset_control *reset_control_get_shared( return __reset_control_get(dev, id, 0, true, false, false); } +/** + * reset_control_get_optional_exclusive - optional reset_control_get_exclusive() + * @dev: device to be reset by the controller + * @id: reset line name + * + * Optional variant of reset_control_get_exclusive(). If the requested reset + * is not specified in the device tree, this function returns NULL instead of + * an error. + * + * See reset_control_get_exclusive() for more information. + */ static inline struct reset_control *reset_control_get_optional_exclusive( struct device *dev, const char *id) { return __reset_control_get(dev, id, 0, false, true, true); } +/** + * reset_control_get_optional_shared - optional reset_control_get_shared() + * @dev: device to be reset by the controller + * @id: reset line name + * + * Optional variant of reset_control_get_shared(). If the requested reset + * is not specified in the device tree, this function returns NULL instead of + * an error. + * + * See reset_control_get_shared() for more information. + */ static inline struct reset_control *reset_control_get_optional_shared( struct device *dev, const char *id) { @@ -354,12 +376,36 @@ static inline struct reset_control *devm_reset_control_get_shared( return __devm_reset_control_get(dev, id, 0, true, false, false); } +/** + * devm_reset_control_get_optional_exclusive - resource managed + * reset_control_get_optional_exclusive() + * @dev: device to be reset by the controller + * @id: reset line name + * + * Managed reset_control_get_optional_exclusive(). For reset controllers + * returned from this function, reset_control_put() is called automatically on + * driver detach. + * + * See reset_control_get_optional_exclusive() for more information. + */ static inline struct reset_control *devm_reset_control_get_optional_exclusive( struct device *dev, const char *id) { return __devm_reset_control_get(dev, id, 0, false, true, true); } +/** + * devm_reset_control_get_optional_shared - resource managed + * reset_control_get_optional_shared() + * @dev: device to be reset by the controller + * @id: reset line name + * + * Managed reset_control_get_optional_shared(). For reset controllers returned + * from this function, reset_control_put() is called automatically on driver + * detach. + * + * See reset_control_get_optional_shared() for more information. + */ static inline struct reset_control *devm_reset_control_get_optional_shared( struct device *dev, const char *id) { -- cgit v1.2.3 From b84dfe1af54053e0aedb14cd26307859aa1df35f Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Wed, 23 Oct 2019 12:40:14 +0000 Subject: i2c: add support for filters optional properties i2c-digital-filter-width-ns: This optional timing property specifies the width of the spikes on the i2c lines (in ns) that can be filtered out by built-in digital filters which are embedded in some i2c controllers. i2c-analog-filter-cutoff-frequency: This optional timing property specifies the cutoff frequency of a low-pass analog filter built-in i2c controllers. This low pass filter is used to filter out high frequency noise on the i2c lines. Specified in Hz. Include these properties in the timings structure and read them as integers. Signed-off-by: Eugen Hristev Acked-by: Ludovic Desroches Reviewed-by: Peter Rosin Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 1361637c369d..aaf57d9b41db 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -575,6 +575,10 @@ struct i2c_lock_operations { * @scl_int_delay_ns: time IP core additionally needs to setup SCL in ns * @sda_fall_ns: time SDA signal takes to fall in ns; t(f) in the I2C specification * @sda_hold_ns: time IP core additionally needs to hold SDA in ns + * @digital_filter_width_ns: width in ns of spikes on i2c lines that the IP core + * digital filter can filter out + * @analog_filter_cutoff_freq_hz: threshold frequency for the low pass IP core + * analog filter */ struct i2c_timings { u32 bus_freq_hz; @@ -583,6 +587,8 @@ struct i2c_timings { u32 scl_int_delay_ns; u32 sda_fall_ns; u32 sda_hold_ns; + u32 digital_filter_width_ns; + u32 analog_filter_cutoff_freq_hz; }; /** -- cgit v1.2.3 From 26ed19adbab16410460bd8b90ccc7430229a0b4a Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 18 Jun 2019 01:21:23 +0900 Subject: libfdt: reduce the number of headers included from libfdt_env.h Currently, libfdt_env.h includes just for INT_MAX. pulls in a lots of broat. Thanks to commit 54d50897d544 ("linux/kernel.h: split *_MAX and *_MIN macros into "), can be replaced with . This saves including dozens of headers. Signed-off-by: Masahiro Yamada Signed-off-by: Rob Herring --- include/linux/libfdt_env.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/libfdt_env.h b/include/linux/libfdt_env.h index edb0f0c30904..2231eb855e8f 100644 --- a/include/linux/libfdt_env.h +++ b/include/linux/libfdt_env.h @@ -2,7 +2,7 @@ #ifndef LIBFDT_ENV_H #define LIBFDT_ENV_H -#include /* For INT_MAX */ +#include /* For INT_MAX */ #include #include -- cgit v1.2.3 From 3820729160440158a014add69cc0d371061a96b2 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 24 Oct 2019 17:18:11 -0700 Subject: bpf: Prepare btf_ctx_access for non raw_tp use case This patch makes a few changes to btf_ctx_access() to prepare it for non raw_tp use case where the attach_btf_id is not necessary a BTF_KIND_TYPEDEF. It moves the "btf_trace_" prefix check and typedef-follow logic to a new function "check_attach_btf_id()" which is called only once during bpf_check(). btf_ctx_access() only operates on a BTF_KIND_FUNC_PROTO type now. That should also be more efficient since it is done only one instead of every-time check_ctx_access() is called. "check_attach_btf_id()" needs to find the func_proto type from the attach_btf_id. It needs to store the result into the newly added prog->aux->attach_func_proto. func_proto btf type has no name, so a proper name should be stored into "attach_func_name" also. v2: - Move the "btf_trace_" check to an earlier verifier phase (Alexei) Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20191025001811.1718491-1-kafai@fb.com --- include/linux/bpf.h | 5 +++++ include/linux/btf.h | 31 +++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 2c2c29b49845..171be30fe0ae 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -392,6 +392,11 @@ struct bpf_prog_aux { u32 attach_btf_id; /* in-kernel BTF type id to attach to */ bool verifier_zext; /* Zero extensions has been inserted by verifier. */ bool offload_requested; + bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */ + /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ + const struct btf_type *attach_func_proto; + /* function name for valid attach_btf_id */ + const char *attach_func_name; struct bpf_prog **func; void *jit_data; /* JIT specific data. arch dependent */ struct latch_tree_node ksym_tnode; diff --git a/include/linux/btf.h b/include/linux/btf.h index 55d43bc856be..9dee00859c5f 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -5,6 +5,7 @@ #define _LINUX_BTF_H 1 #include +#include struct btf; struct btf_member; @@ -53,6 +54,36 @@ bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s, int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t); bool btf_type_is_void(const struct btf_type *t); +static inline bool btf_type_is_ptr(const struct btf_type *t) +{ + return BTF_INFO_KIND(t->info) == BTF_KIND_PTR; +} + +static inline bool btf_type_is_int(const struct btf_type *t) +{ + return BTF_INFO_KIND(t->info) == BTF_KIND_INT; +} + +static inline bool btf_type_is_enum(const struct btf_type *t) +{ + return BTF_INFO_KIND(t->info) == BTF_KIND_ENUM; +} + +static inline bool btf_type_is_typedef(const struct btf_type *t) +{ + return BTF_INFO_KIND(t->info) == BTF_KIND_TYPEDEF; +} + +static inline bool btf_type_is_func(const struct btf_type *t) +{ + return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC; +} + +static inline bool btf_type_is_func_proto(const struct btf_type *t) +{ + return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC_PROTO; +} + #ifdef CONFIG_BPF_SYSCALL const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id); const char *btf_name_by_offset(const struct btf *btf, u32 offset); -- cgit v1.2.3 From a69b0e855d3fd278ff6f09a23e1edf929538e304 Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Mon, 21 Oct 2019 19:03:06 +0000 Subject: dma-buf: Add dma-buf heaps framework This framework allows a unified userspace interface for dma-buf exporters, allowing userland to allocate specific types of memory for use in dma-buf sharing. Each heap is given its own device node, which a user can allocate a dma-buf fd from using the DMA_HEAP_IOC_ALLOC. This code is an evoluiton of the Android ION implementation, and a big thanks is due to its authors/maintainers over time for their effort: Rebecca Schultz Zavin, Colin Cross, Benjamin Gaignard, Laura Abbott, and many other contributors! Cc: Laura Abbott Cc: Benjamin Gaignard Cc: Sumit Semwal Cc: Liam Mark Cc: Pratik Patel Cc: Brian Starkey Cc: Vincent Donnefort Cc: Sudipto Paul Cc: Andrew F. Davis Cc: Christoph Hellwig Cc: Chenbo Feng Cc: Alistair Strachan Cc: Hridya Valsaraju Cc: Hillf Danton Cc: dri-devel@lists.freedesktop.org Reviewed-by: Benjamin Gaignard Reviewed-by: Brian Starkey Acked-by: Laura Abbott Tested-by: Ayan Kumar Halder Signed-off-by: Andrew F. Davis Signed-off-by: John Stultz Signed-off-by: Sumit Semwal Link: https://patchwork.freedesktop.org/patch/msgid/20191021190310.85221-2-john.stultz@linaro.org --- include/linux/dma-heap.h | 59 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 include/linux/dma-heap.h (limited to 'include/linux') diff --git a/include/linux/dma-heap.h b/include/linux/dma-heap.h new file mode 100644 index 000000000000..454e354d1ffb --- /dev/null +++ b/include/linux/dma-heap.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * DMABUF Heaps Allocation Infrastructure + * + * Copyright (C) 2011 Google, Inc. + * Copyright (C) 2019 Linaro Ltd. + */ + +#ifndef _DMA_HEAPS_H +#define _DMA_HEAPS_H + +#include +#include + +struct dma_heap; + +/** + * struct dma_heap_ops - ops to operate on a given heap + * @allocate: allocate dmabuf and return fd + * + * allocate returns dmabuf fd on success, -errno on error. + */ +struct dma_heap_ops { + int (*allocate)(struct dma_heap *heap, + unsigned long len, + unsigned long fd_flags, + unsigned long heap_flags); +}; + +/** + * struct dma_heap_export_info - information needed to export a new dmabuf heap + * @name: used for debugging/device-node name + * @ops: ops struct for this heap + * @priv: heap exporter private data + * + * Information needed to export a new dmabuf heap. + */ +struct dma_heap_export_info { + const char *name; + const struct dma_heap_ops *ops; + void *priv; +}; + +/** + * dma_heap_get_drvdata() - get per-heap driver data + * @heap: DMA-Heap to retrieve private data for + * + * Returns: + * The per-heap data for the heap. + */ +void *dma_heap_get_drvdata(struct dma_heap *heap); + +/** + * dma_heap_add - adds a heap to dmabuf heaps + * @exp_info: information needed to register this heap + */ +struct dma_heap *dma_heap_add(const struct dma_heap_export_info *exp_info); + +#endif /* _DMA_HEAPS_H */ -- cgit v1.2.3 From 5153faac18d293fc7abb19ff7034683fbcd82dc7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 24 Oct 2019 12:03:51 -0700 Subject: cgroup: remove cgroup_enable_task_cg_lists() optimization cgroup_enable_task_cg_lists() is used to lazyily initialize task cgroup associations on the first use to reduce fork / exit overheads on systems which don't use cgroup. Unfortunately, locking around it has never been actually correct and its value is dubious given how the vast majority of systems use cgroup right away from boot. This patch removes the optimization. For now, replace the cg_list based branches with WARN_ON_ONCE()'s to be on the safe side. We can simplify the logic further in the future. Signed-off-by: Tejun Heo Reported-by: Oleg Nesterov Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 3ba3e6da13a6..f6b048902d6c 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -150,7 +150,6 @@ struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset, struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset, struct cgroup_subsys_state **dst_cssp); -void cgroup_enable_task_cg_lists(void); void css_task_iter_start(struct cgroup_subsys_state *css, unsigned int flags, struct css_task_iter *it); struct task_struct *css_task_iter_next(struct css_task_iter *it); -- cgit v1.2.3 From 993e4cdebb5a53bc87f21cdd34d1dc42225de43d Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 24 Oct 2019 19:31:10 +0200 Subject: block: reorder bio::__bi_remaining for better packing Simple reordering of __bi_remaining can reduce bio size by 8 bytes that are now wasted on padding (measured on x86_64): struct bio { struct bio * bi_next; /* 0 8 */ struct gendisk * bi_disk; /* 8 8 */ unsigned int bi_opf; /* 16 4 */ short unsigned int bi_flags; /* 20 2 */ short unsigned int bi_ioprio; /* 22 2 */ short unsigned int bi_write_hint; /* 24 2 */ blk_status_t bi_status; /* 26 1 */ u8 bi_partno; /* 27 1 */ /* XXX 4 bytes hole, try to pack */ struct bvec_iter bi_iter; /* 32 24 */ /* XXX last struct has 4 bytes of padding */ atomic_t __bi_remaining; /* 56 4 */ /* XXX 4 bytes hole, try to pack */ [...] /* size: 104, cachelines: 2, members: 19 */ /* sum members: 96, holes: 2, sum holes: 8 */ /* paddings: 1, sum paddings: 4 */ /* last cacheline: 40 bytes */ }; Now becomes: struct bio { struct bio * bi_next; /* 0 8 */ struct gendisk * bi_disk; /* 8 8 */ unsigned int bi_opf; /* 16 4 */ short unsigned int bi_flags; /* 20 2 */ short unsigned int bi_ioprio; /* 22 2 */ short unsigned int bi_write_hint; /* 24 2 */ blk_status_t bi_status; /* 26 1 */ u8 bi_partno; /* 27 1 */ atomic_t __bi_remaining; /* 28 4 */ struct bvec_iter bi_iter; /* 32 24 */ /* XXX last struct has 4 bytes of padding */ [...] /* size: 96, cachelines: 2, members: 19 */ /* paddings: 1, sum paddings: 4 */ /* last cacheline: 32 bytes */ }; Signed-off-by: David Sterba Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index d688b96d1d63..1e7eeec16458 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -153,10 +153,10 @@ struct bio { unsigned short bi_write_hint; blk_status_t bi_status; u8 bi_partno; + atomic_t __bi_remaining; struct bvec_iter bi_iter; - atomic_t __bi_remaining; bio_end_io_t *bi_end_io; void *bi_private; -- cgit v1.2.3 From d386732bc142c63b9f676fed098bc06f91ee964a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= Date: Mon, 21 Oct 2019 21:07:24 -0300 Subject: blk-mq: fill header with kernel-doc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Insert documentation for structs, enums and functions at header file. Format existing and new comments at struct blk_mq_ops as kernel-doc comments. Reviewed-by: Bart Van Assche Signed-off-by: André Almeida Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 225 ++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 185 insertions(+), 40 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index e0fce93ac127..4919d22e1aff 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -10,74 +10,171 @@ struct blk_mq_tags; struct blk_flush_queue; /** - * struct blk_mq_hw_ctx - State for a hardware queue facing the hardware block device + * struct blk_mq_hw_ctx - State for a hardware queue facing the hardware + * block device */ struct blk_mq_hw_ctx { struct { + /** @lock: Protects the dispatch list. */ spinlock_t lock; + /** + * @dispatch: Used for requests that are ready to be + * dispatched to the hardware but for some reason (e.g. lack of + * resources) could not be sent to the hardware. As soon as the + * driver can send new requests, requests at this list will + * be sent first for a fairer dispatch. + */ struct list_head dispatch; - unsigned long state; /* BLK_MQ_S_* flags */ + /** + * @state: BLK_MQ_S_* flags. Defines the state of the hw + * queue (active, scheduled to restart, stopped). + */ + unsigned long state; } ____cacheline_aligned_in_smp; + /** + * @run_work: Used for scheduling a hardware queue run at a later time. + */ struct delayed_work run_work; + /** @cpumask: Map of available CPUs where this hctx can run. */ cpumask_var_t cpumask; + /** + * @next_cpu: Used by blk_mq_hctx_next_cpu() for round-robin CPU + * selection from @cpumask. + */ int next_cpu; + /** + * @next_cpu_batch: Counter of how many works left in the batch before + * changing to the next CPU. + */ int next_cpu_batch; - unsigned long flags; /* BLK_MQ_F_* flags */ + /** @flags: BLK_MQ_F_* flags. Defines the behaviour of the queue. */ + unsigned long flags; + /** + * @sched_data: Pointer owned by the IO scheduler attached to a request + * queue. It's up to the IO scheduler how to use this pointer. + */ void *sched_data; + /** + * @queue: Pointer to the request queue that owns this hardware context. + */ struct request_queue *queue; + /** @fq: Queue of requests that need to perform a flush operation. */ struct blk_flush_queue *fq; + /** + * @driver_data: Pointer to data owned by the block driver that created + * this hctx + */ void *driver_data; + /** + * @ctx_map: Bitmap for each software queue. If bit is on, there is a + * pending request in that software queue. + */ struct sbitmap ctx_map; + /** + * @dispatch_from: Software queue to be used when no scheduler was + * selected. + */ struct blk_mq_ctx *dispatch_from; + /** + * @dispatch_busy: Number used by blk_mq_update_dispatch_busy() to + * decide if the hw_queue is busy using Exponential Weighted Moving + * Average algorithm. + */ unsigned int dispatch_busy; + /** @type: HCTX_TYPE_* flags. Type of hardware queue. */ unsigned short type; + /** @nr_ctx: Number of software queues. */ unsigned short nr_ctx; + /** @ctxs: Array of software queues. */ struct blk_mq_ctx **ctxs; + /** @dispatch_wait_lock: Lock for dispatch_wait queue. */ spinlock_t dispatch_wait_lock; + /** + * @dispatch_wait: Waitqueue to put requests when there is no tag + * available at the moment, to wait for another try in the future. + */ wait_queue_entry_t dispatch_wait; + + /** + * @wait_index: Index of next available dispatch_wait queue to insert + * requests. + */ atomic_t wait_index; + /** + * @tags: Tags owned by the block driver. A tag at this set is only + * assigned when a request is dispatched from a hardware queue. + */ struct blk_mq_tags *tags; + /** + * @sched_tags: Tags owned by I/O scheduler. If there is an I/O + * scheduler associated with a request queue, a tag is assigned when + * that request is allocated. Else, this member is not used. + */ struct blk_mq_tags *sched_tags; + /** @queued: Number of queued requests. */ unsigned long queued; + /** @run: Number of dispatched requests. */ unsigned long run; #define BLK_MQ_MAX_DISPATCH_ORDER 7 + /** @dispatched: Number of dispatch requests by queue. */ unsigned long dispatched[BLK_MQ_MAX_DISPATCH_ORDER]; + /** @numa_node: NUMA node the storage adapter has been connected to. */ unsigned int numa_node; + /** @queue_num: Index of this hardware queue. */ unsigned int queue_num; + /** + * @nr_active: Number of active requests. Only used when a tag set is + * shared across request queues. + */ atomic_t nr_active; + /** @cpuhp_dead: List to store request if some CPU die. */ struct hlist_node cpuhp_dead; + /** @kobj: Kernel object for sysfs. */ struct kobject kobj; + /** @poll_considered: Count times blk_poll() was called. */ unsigned long poll_considered; + /** @poll_invoked: Count how many requests blk_poll() polled. */ unsigned long poll_invoked; + /** @poll_success: Count how many polled requests were completed. */ unsigned long poll_success; #ifdef CONFIG_BLK_DEBUG_FS + /** + * @debugfs_dir: debugfs directory for this hardware queue. Named + * as cpu. + */ struct dentry *debugfs_dir; + /** @sched_debugfs_dir: debugfs directory for the scheduler. */ struct dentry *sched_debugfs_dir; #endif + /** @hctx_list: List of all hardware queues. */ struct list_head hctx_list; - /* Must be the last member - see also blk_mq_hw_ctx_size(). */ + /** + * @srcu: Sleepable RCU. Use as lock when type of the hardware queue is + * blocking (BLK_MQ_F_BLOCKING). Must be the last member - see also + * blk_mq_hw_ctx_size(). + */ struct srcu_struct srcu[0]; }; /** - * struct blk_mq_queue_map - ctx -> hctx mapping + * struct blk_mq_queue_map - Map software queues to hardware queues * @mq_map: CPU ID to hardware queue index map. This is an array * with nr_cpu_ids elements. Each element has a value in the range * [@queue_offset, @queue_offset + @nr_queues). @@ -92,10 +189,17 @@ struct blk_mq_queue_map { unsigned int queue_offset; }; +/** + * enum hctx_type - Type of hardware queue + * @HCTX_TYPE_DEFAULT: All I/O not otherwise accounted for. + * @HCTX_TYPE_READ: Just for READ I/O. + * @HCTX_TYPE_POLL: Polled I/O of any kind. + * @HCTX_MAX_TYPES: Number of types of hctx. + */ enum hctx_type { - HCTX_TYPE_DEFAULT, /* all I/O not otherwise accounted for */ - HCTX_TYPE_READ, /* just for READ I/O */ - HCTX_TYPE_POLL, /* polled I/O of any kind */ + HCTX_TYPE_DEFAULT, + HCTX_TYPE_READ, + HCTX_TYPE_POLL, HCTX_MAX_TYPES, }; @@ -147,6 +251,12 @@ struct blk_mq_tag_set { struct list_head tag_list; }; +/** + * struct blk_mq_queue_data - Data about a request inserted in a queue + * + * @rq: Request pointer. + * @last: If it is the last request in the queue. + */ struct blk_mq_queue_data { struct request *rq; bool last; @@ -174,81 +284,101 @@ typedef bool (busy_fn)(struct request_queue *); typedef void (complete_fn)(struct request *); typedef void (cleanup_rq_fn)(struct request *); - +/** + * struct blk_mq_ops - Callback functions that implements block driver + * behaviour. + */ struct blk_mq_ops { - /* - * Queue request + /** + * @queue_rq: Queue a new request from block IO. */ queue_rq_fn *queue_rq; - /* - * If a driver uses bd->last to judge when to submit requests to - * hardware, it must define this function. In case of errors that - * make us stop issuing further requests, this hook serves the + /** + * @commit_rqs: If a driver uses bd->last to judge when to submit + * requests to hardware, it must define this function. In case of errors + * that make us stop issuing further requests, this hook serves the * purpose of kicking the hardware (which the last request otherwise * would have done). */ commit_rqs_fn *commit_rqs; - /* - * Reserve budget before queue request, once .queue_rq is + /** + * @get_budget: Reserve budget before queue request, once .queue_rq is * run, it is driver's responsibility to release the * reserved budget. Also we have to handle failure case * of .get_budget for avoiding I/O deadlock. */ get_budget_fn *get_budget; + /** + * @put_budget: Release the reserved budget. + */ put_budget_fn *put_budget; - /* - * Called on request timeout + /** + * @timeout: Called on request timeout. */ timeout_fn *timeout; - /* - * Called to poll for completion of a specific tag. + /** + * @poll: Called to poll for completion of a specific tag. */ poll_fn *poll; + /** + * @complete: Mark the request as complete. + */ complete_fn *complete; - /* - * Called when the block layer side of a hardware queue has been - * set up, allowing the driver to allocate/init matching structures. - * Ditto for exit/teardown. + /** + * @init_hctx: Called when the block layer side of a hardware queue has + * been set up, allowing the driver to allocate/init matching + * structures. */ init_hctx_fn *init_hctx; + /** + * @exit_hctx: Ditto for exit/teardown. + */ exit_hctx_fn *exit_hctx; - /* - * Called for every command allocated by the block layer to allow - * the driver to set up driver specific data. + /** + * @init_request: Called for every command allocated by the block layer + * to allow the driver to set up driver specific data. * * Tag greater than or equal to queue_depth is for setting up * flush request. - * - * Ditto for exit/teardown. */ init_request_fn *init_request; + /** + * @exit_request: Ditto for exit/teardown. + */ exit_request_fn *exit_request; - /* Called from inside blk_get_request() */ + + /** + * @initialize_rq_fn: Called from inside blk_get_request(). + */ void (*initialize_rq_fn)(struct request *rq); - /* - * Called before freeing one request which isn't completed yet, - * and usually for freeing the driver private data + /** + * @cleanup_rq: Called before freeing one request which isn't completed + * yet, and usually for freeing the driver private data. */ cleanup_rq_fn *cleanup_rq; - /* - * If set, returns whether or not this queue currently is busy + /** + * @busy: If set, returns whether or not this queue currently is busy. */ busy_fn *busy; + /** + * @map_queues: This allows drivers specify their own queue mapping by + * overriding the setup-time function that builds the mq_map. + */ map_queues_fn *map_queues; #ifdef CONFIG_BLK_DEBUG_FS - /* - * Used by the debugfs implementation to show driver-specific + /** + * @show_rq: Used by the debugfs implementation to show driver-specific * information about a request. */ void (*show_rq)(struct seq_file *m, struct request *rq); @@ -391,14 +521,29 @@ void blk_mq_quiesce_queue_nowait(struct request_queue *q); unsigned int blk_mq_rq_cpu(struct request *rq); -/* +/** + * blk_mq_rq_from_pdu - cast a PDU to a request + * @pdu: the PDU (Protocol Data Unit) to be casted + * + * Return: request + * * Driver command data is immediately after the request. So subtract request - * size to get back to the original request, add request size to get the PDU. + * size to get back to the original request. */ static inline struct request *blk_mq_rq_from_pdu(void *pdu) { return pdu - sizeof(struct request); } + +/** + * blk_mq_rq_to_pdu - cast a request to a PDU + * @rq: the request to be casted + * + * Return: pointer to the PDU + * + * Driver command data is immediately after the request. So add request to get + * the PDU. + */ static inline void *blk_mq_rq_to_pdu(struct request *rq) { return rq + 1; -- cgit v1.2.3 From 480274787d7e3458bc5a7cfbbbe07033984ad711 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Wed, 23 Oct 2019 11:09:26 -0400 Subject: tcp: add TCP_INFO status for failed client TFO The TCPI_OPT_SYN_DATA bit as part of tcpi_options currently reports whether or not data-in-SYN was ack'd on both the client and server side. We'd like to gather more information on the client-side in the failure case in order to indicate the reason for the failure. This can be useful for not only debugging TFO, but also for creating TFO socket policies. For example, if a middle box removes the TFO option or drops a data-in-SYN, we can can detect this case, and turn off TFO for these connections saving the extra retransmits. The newly added tcpi_fastopen_client_fail status is 2 bits and has the following 4 states: 1) TFO_STATUS_UNSPEC Catch-all state which includes when TFO is disabled via black hole detection, which is indicated via LINUX_MIB_TCPFASTOPENBLACKHOLE. 2) TFO_COOKIE_UNAVAILABLE If TFO_CLIENT_NO_COOKIE mode is off, this state indicates that no cookie is available in the cache. 3) TFO_DATA_NOT_ACKED Data was sent with SYN, we received a SYN/ACK but it did not cover the data portion. Cookie is not accepted by server because the cookie may be invalid or the server may be overloaded. 4) TFO_SYN_RETRANSMITTED Data was sent with SYN, we received a SYN/ACK which did not cover the data after at least 1 additional SYN was sent (without data). It may be the case that a middle-box is dropping data-in-SYN packets. Thus, it would be more efficient to not use TFO on this connection to avoid extra retransmits during connection establishment. These new fields do not cover all the cases where TFO may fail, but other failures, such as SYN/ACK + data being dropped, will result in the connection not becoming established. And a connection blackhole after session establishment shows up as a stalled connection. Signed-off-by: Jason Baron Cc: Eric Dumazet Cc: Neal Cardwell Cc: Christoph Paasch Cc: Yuchung Cheng Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 668e25a76d69..ca6f01531e64 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -223,7 +223,7 @@ struct tcp_sock { fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */ fastopen_no_cookie:1, /* Allow send/recv SYN+data without a cookie */ is_sack_reneg:1, /* in recovery from loss with SACK reneg? */ - unused:2; + fastopen_client_fail:2; /* reason why fastopen failed */ u8 nonagle : 4,/* Disable Nagle algorithm? */ thin_lto : 1,/* Use linear timeouts for thin streams */ recvmsg_inq : 1,/* Indicate # of bytes in queue upon recvmsg */ -- cgit v1.2.3 From caccddcfc4b4de75930df2e8f7fd0c66556b13ff Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Mon, 18 Jun 2018 14:01:51 +0200 Subject: gpu: host1x: Request channels for clients, not devices A struct device doesn't carry much information that a channel might be interested in, but the client very much does. Request channels for the clients rather than their parent devices and store a pointer to them in order to have that information available when needed. Signed-off-by: Thierry Reding --- include/linux/host1x.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/host1x.h b/include/linux/host1x.h index e6eea45e1154..4396cd566a33 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -158,7 +158,7 @@ u32 host1x_syncpt_base_id(struct host1x_syncpt_base *base); struct host1x_channel; struct host1x_job; -struct host1x_channel *host1x_channel_request(struct device *dev); +struct host1x_channel *host1x_channel_request(struct host1x_client *client); struct host1x_channel *host1x_channel_get(struct host1x_channel *channel); void host1x_channel_put(struct host1x_channel *channel); int host1x_job_submit(struct host1x_job *job); -- cgit v1.2.3 From aacdf19849734d1be5e407932228ae101ba5b92f Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 8 Feb 2019 14:35:13 +0100 Subject: drm/tegra: Move IOMMU group into host1x client Handling of the IOMMU group attachment is common to all clients, so move the group into the client to simplify code. Signed-off-by: Thierry Reding --- include/linux/host1x.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 4396cd566a33..df6e613ba715 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -18,6 +18,7 @@ enum host1x_class { }; struct host1x_client; +struct iommu_group; /** * struct host1x_client_ops - host1x client operations @@ -34,6 +35,7 @@ struct host1x_client_ops { * @list: list node for the host1x client * @parent: pointer to struct device representing the host1x controller * @dev: pointer to struct device backing this host1x client + * @group: IOMMU group that this client is a member of * @ops: host1x client operations * @class: host1x class represented by this client * @channel: host1x channel associated with this client @@ -44,6 +46,7 @@ struct host1x_client { struct list_head list; struct device *parent; struct device *dev; + struct iommu_group *group; const struct host1x_client_ops *ops; -- cgit v1.2.3 From 69393cb03ccdf29f3b452d3482ef918469d1c098 Mon Sep 17 00:00:00 2001 From: "Christopher M. Riedl" Date: Sat, 7 Sep 2019 01:11:24 -0500 Subject: powerpc/xmon: Restrict when kernel is locked down Xmon should be either fully or partially disabled depending on the kernel lockdown state. Put xmon into read-only mode for lockdown=integrity and prevent user entry into xmon when lockdown=confidentiality. Xmon checks the lockdown state on every attempted entry: (1) during early xmon'ing (2) when triggered via sysrq (3) when toggled via debugfs (4) when triggered via a previously enabled breakpoint The following lockdown state transitions are handled: (1) lockdown=none -> lockdown=integrity set xmon read-only mode (2) lockdown=none -> lockdown=confidentiality clear all breakpoints, set xmon read-only mode, prevent user re-entry into xmon (3) lockdown=integrity -> lockdown=confidentiality clear all breakpoints, set xmon read-only mode, prevent user re-entry into xmon Suggested-by: Andrew Donnellan Signed-off-by: Christopher M. Riedl Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190907061124.1947-3-cmr@informatik.wtf --- include/linux/security.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index a8d59d612d27..79567eacb834 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -116,12 +116,14 @@ enum lockdown_reason { LOCKDOWN_MODULE_PARAMETERS, LOCKDOWN_MMIOTRACE, LOCKDOWN_DEBUGFS, + LOCKDOWN_XMON_WR, LOCKDOWN_INTEGRITY_MAX, LOCKDOWN_KCORE, LOCKDOWN_KPROBES, LOCKDOWN_BPF_READ, LOCKDOWN_PERF, LOCKDOWN_TRACEFS, + LOCKDOWN_XMON_RW, LOCKDOWN_CONFIDENTIALITY_MAX, }; -- cgit v1.2.3 From fc1adfe306b71e094df636012f8c0fed971cad45 Mon Sep 17 00:00:00 2001 From: Alexey Budankov Date: Wed, 23 Oct 2019 10:11:04 +0300 Subject: perf/core, perf/x86: Introduce swap_task_ctx() method at 'struct pmu' Declare swap_task_ctx() methods at the generic and x86 specific pmu types to bridge calls to platform specific PMU code on optimized context switch path between equivalent task perf event contexts. Signed-off-by: Alexey Budankov Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Ian Rogers Cc: Jiri Olsa Cc: Kan Liang Cc: Linus Torvalds Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: https://lkml.kernel.org/r/9a0aa84a-f062-9b64-3133-373658550c4b@linux.intel.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 4f77b22d47be..011dcbdbccc2 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -410,6 +410,15 @@ struct pmu { */ size_t task_ctx_size; + /* + * PMU specific parts of task perf event context (i.e. ctx->task_ctx_data) + * can be synchronized using this function. See Intel LBR callstack support + * implementation and Perf core context switch handling callbacks for usage + * examples. + */ + void (*swap_task_ctx) (struct perf_event_context *prev, + struct perf_event_context *next); + /* optional */ /* * Set up pmu-private data structures for an AUX area -- cgit v1.2.3 From 837a6e7f5cdb5e411c6187729e12962c2705160d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Oct 2019 22:26:02 -0700 Subject: fs: add generic UNRESVSP and ZERO_RANGE ioctl handlers These use the same scheme as the pre-existing mapping of the XFS RESVP ioctls to ->falloc, so just extend it and remove the XFS implementation. Signed-off-by: Christoph Hellwig [darrick: fix compile error on s390] Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- include/linux/falloc.h | 3 +++ include/linux/fs.h | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/falloc.h b/include/linux/falloc.h index 674d59f4d6ce..f5c73f0ec22d 100644 --- a/include/linux/falloc.h +++ b/include/linux/falloc.h @@ -20,7 +20,10 @@ struct space_resv { }; #define FS_IOC_RESVSP _IOW('X', 40, struct space_resv) +#define FS_IOC_UNRESVSP _IOW('X', 41, struct space_resv) #define FS_IOC_RESVSP64 _IOW('X', 42, struct space_resv) +#define FS_IOC_UNRESVSP64 _IOW('X', 43, struct space_resv) +#define FS_IOC_ZERO_RANGE _IOW('X', 57, struct space_resv) #define FALLOC_FL_SUPPORTED_MASK (FALLOC_FL_KEEP_SIZE | \ FALLOC_FL_PUNCH_HOLE | \ diff --git a/include/linux/fs.h b/include/linux/fs.h index e0d909d35763..2b5692207c1d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2547,7 +2547,7 @@ extern int finish_no_open(struct file *file, struct dentry *dentry); /* fs/ioctl.c */ -extern int ioctl_preallocate(struct file *filp, void __user *argp); +extern int ioctl_preallocate(struct file *filp, int mode, void __user *argp); /* fs/dcache.c */ extern void __init vfs_caches_init_early(void); -- cgit v1.2.3 From c3a6cf19e695c8b0a9bf8b5933f863e12d878b7c Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 18 Oct 2019 10:31:43 +0100 Subject: export: avoid code duplication in include/linux/export.h include/linux/export.h has lots of code duplication between EXPORT_SYMBOL and EXPORT_SYMBOL_NS. To improve the maintainability and readability, unify the implementation. When the symbol has no namespace, pass the empty string "" to the 'ns' parameter. The drawback of this change is, it grows the code size. When the symbol has no namespace, sym->namespace was previously NULL, but it is now an empty string "". So, it increases 1 byte for every no namespace EXPORT_SYMBOL. A typical kernel configuration has 10K exported symbols, so it increases 10KB in rough estimation. I did not come up with a good idea to refactor it without increasing the code size. I am not sure how big a deal it is, but at least include/linux/export.h looks nicer. Reviewed-by: Greg Kroah-Hartman Signed-off-by: Masahiro Yamada [maennich: rebase on top of 3 fixes for the namespace feature] Signed-off-by: Matthias Maennich Signed-off-by: Jessica Yu --- include/linux/export.h | 91 ++++++++++++++++---------------------------------- 1 file changed, 28 insertions(+), 63 deletions(-) (limited to 'include/linux') diff --git a/include/linux/export.h b/include/linux/export.h index 941d075f03d6..201262793369 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -46,7 +46,7 @@ extern struct module __this_module; * absolute relocations that require runtime processing on relocatable * kernels. */ -#define __KSYMTAB_ENTRY_NS(sym, sec) \ +#define __KSYMTAB_ENTRY(sym, sec) \ __ADDRESSABLE(sym) \ asm(" .section \"___ksymtab" sec "+" #sym "\", \"a\" \n" \ " .balign 4 \n" \ @@ -56,33 +56,17 @@ extern struct module __this_module; " .long __kstrtabns_" #sym "- . \n" \ " .previous \n") -#define __KSYMTAB_ENTRY(sym, sec) \ - __ADDRESSABLE(sym) \ - asm(" .section \"___ksymtab" sec "+" #sym "\", \"a\" \n" \ - " .balign 4 \n" \ - "__ksymtab_" #sym ": \n" \ - " .long " #sym "- . \n" \ - " .long __kstrtab_" #sym "- . \n" \ - " .long 0 \n" \ - " .previous \n") - struct kernel_symbol { int value_offset; int name_offset; int namespace_offset; }; #else -#define __KSYMTAB_ENTRY_NS(sym, sec) \ - static const struct kernel_symbol __ksymtab_##sym \ - __attribute__((section("___ksymtab" sec "+" #sym), used)) \ - __aligned(sizeof(void *)) \ - = { (unsigned long)&sym, __kstrtab_##sym, __kstrtabns_##sym } - #define __KSYMTAB_ENTRY(sym, sec) \ static const struct kernel_symbol __ksymtab_##sym \ __attribute__((section("___ksymtab" sec "+" #sym), used)) \ __aligned(sizeof(void *)) \ - = { (unsigned long)&sym, __kstrtab_##sym, NULL } + = { (unsigned long)&sym, __kstrtab_##sym, __kstrtabns_##sym } struct kernel_symbol { unsigned long value; @@ -93,28 +77,20 @@ struct kernel_symbol { #ifdef __GENKSYMS__ -#define ___EXPORT_SYMBOL(sym,sec) __GENKSYMS_EXPORT_SYMBOL(sym) -#define ___EXPORT_SYMBOL_NS(sym,sec,ns) __GENKSYMS_EXPORT_SYMBOL(sym) +#define ___EXPORT_SYMBOL(sym, sec, ns) __GENKSYMS_EXPORT_SYMBOL(sym) #else -#define ___export_symbol_common(sym, sec) \ +/* For every exported symbol, place a struct in the __ksymtab section */ +#define ___EXPORT_SYMBOL(sym, sec, ns) \ extern typeof(sym) sym; \ __CRC_SYMBOL(sym, sec); \ static const char __kstrtab_##sym[] \ __attribute__((section("__ksymtab_strings"), used, aligned(1))) \ - = #sym \ - -/* For every exported symbol, place a struct in the __ksymtab section */ -#define ___EXPORT_SYMBOL_NS(sym, sec, ns) \ - ___export_symbol_common(sym, sec); \ + = #sym; \ static const char __kstrtabns_##sym[] \ __attribute__((section("__ksymtab_strings"), used, aligned(1))) \ - = #ns; \ - __KSYMTAB_ENTRY_NS(sym, sec) - -#define ___EXPORT_SYMBOL(sym, sec) \ - ___export_symbol_common(sym, sec); \ + = ns; \ __KSYMTAB_ENTRY(sym, sec) #endif @@ -126,8 +102,7 @@ struct kernel_symbol { * be reused in other execution contexts such as the UEFI stub or the * decompressor. */ -#define __EXPORT_SYMBOL_NS(sym, sec, ns) -#define __EXPORT_SYMBOL(sym, sec) +#define __EXPORT_SYMBOL(sym, sec, ns) #elif defined(CONFIG_TRIM_UNUSED_KSYMS) @@ -143,48 +118,38 @@ struct kernel_symbol { #define __ksym_marker(sym) \ static int __ksym_marker_##sym[0] __section(".discard.ksym") __used -#define __EXPORT_SYMBOL(sym, sec) \ - __ksym_marker(sym); \ - __cond_export_sym(sym, sec, __is_defined(__KSYM_##sym)) -#define __cond_export_sym(sym, sec, conf) \ - ___cond_export_sym(sym, sec, conf) -#define ___cond_export_sym(sym, sec, enabled) \ - __cond_export_sym_##enabled(sym, sec) -#define __cond_export_sym_1(sym, sec) ___EXPORT_SYMBOL(sym, sec) -#define __cond_export_sym_0(sym, sec) /* nothing */ - -#define __EXPORT_SYMBOL_NS(sym, sec, ns) \ +#define __EXPORT_SYMBOL(sym, sec, ns) \ __ksym_marker(sym); \ - __cond_export_ns_sym(sym, sec, ns, __is_defined(__KSYM_##sym)) -#define __cond_export_ns_sym(sym, sec, ns, conf) \ - ___cond_export_ns_sym(sym, sec, ns, conf) -#define ___cond_export_ns_sym(sym, sec, ns, enabled) \ - __cond_export_ns_sym_##enabled(sym, sec, ns) -#define __cond_export_ns_sym_1(sym, sec, ns) ___EXPORT_SYMBOL_NS(sym, sec, ns) -#define __cond_export_ns_sym_0(sym, sec, ns) /* nothing */ + __cond_export_sym(sym, sec, ns, __is_defined(__KSYM_##sym)) +#define __cond_export_sym(sym, sec, ns, conf) \ + ___cond_export_sym(sym, sec, ns, conf) +#define ___cond_export_sym(sym, sec, ns, enabled) \ + __cond_export_sym_##enabled(sym, sec, ns) +#define __cond_export_sym_1(sym, sec, ns) ___EXPORT_SYMBOL(sym, sec, ns) +#define __cond_export_sym_0(sym, sec, ns) /* nothing */ #else -#define __EXPORT_SYMBOL_NS(sym,sec,ns) ___EXPORT_SYMBOL_NS(sym,sec,ns) -#define __EXPORT_SYMBOL(sym,sec) ___EXPORT_SYMBOL(sym,sec) +#define __EXPORT_SYMBOL(sym, sec, ns) ___EXPORT_SYMBOL(sym, sec, ns) #endif /* CONFIG_MODULES */ #ifdef DEFAULT_SYMBOL_NAMESPACE -#undef __EXPORT_SYMBOL -#define __EXPORT_SYMBOL(sym, sec) \ - __EXPORT_SYMBOL_NS(sym, sec, DEFAULT_SYMBOL_NAMESPACE) +#include +#define _EXPORT_SYMBOL(sym, sec) __EXPORT_SYMBOL(sym, sec, __stringify(DEFAULT_SYMBOL_NAMESPACE)) +#else +#define _EXPORT_SYMBOL(sym, sec) __EXPORT_SYMBOL(sym, sec, "") #endif -#define EXPORT_SYMBOL(sym) __EXPORT_SYMBOL(sym, "") -#define EXPORT_SYMBOL_GPL(sym) __EXPORT_SYMBOL(sym, "_gpl") -#define EXPORT_SYMBOL_GPL_FUTURE(sym) __EXPORT_SYMBOL(sym, "_gpl_future") -#define EXPORT_SYMBOL_NS(sym, ns) __EXPORT_SYMBOL_NS(sym, "", ns) -#define EXPORT_SYMBOL_NS_GPL(sym, ns) __EXPORT_SYMBOL_NS(sym, "_gpl", ns) +#define EXPORT_SYMBOL(sym) _EXPORT_SYMBOL(sym, "") +#define EXPORT_SYMBOL_GPL(sym) _EXPORT_SYMBOL(sym, "_gpl") +#define EXPORT_SYMBOL_GPL_FUTURE(sym) _EXPORT_SYMBOL(sym, "_gpl_future") +#define EXPORT_SYMBOL_NS(sym, ns) __EXPORT_SYMBOL(sym, "", #ns) +#define EXPORT_SYMBOL_NS_GPL(sym, ns) __EXPORT_SYMBOL(sym, "_gpl", #ns) #ifdef CONFIG_UNUSED_SYMBOLS -#define EXPORT_UNUSED_SYMBOL(sym) __EXPORT_SYMBOL(sym, "_unused") -#define EXPORT_UNUSED_SYMBOL_GPL(sym) __EXPORT_SYMBOL(sym, "_unused_gpl") +#define EXPORT_UNUSED_SYMBOL(sym) _EXPORT_SYMBOL(sym, "_unused") +#define EXPORT_UNUSED_SYMBOL_GPL(sym) _EXPORT_SYMBOL(sym, "_unused_gpl") #else #define EXPORT_UNUSED_SYMBOL(sym) #define EXPORT_UNUSED_SYMBOL_GPL(sym) -- cgit v1.2.3 From 8e01d9a396e6db153d94a6004e6473d9ff251a6a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 27 Oct 2019 14:41:59 +0000 Subject: KVM: arm64: vgic-v4: Move the GICv4 residency flow to be driven by vcpu_load/put When the VHE code was reworked, a lot of the vgic stuff was moved around, but the GICv4 residency code did stay untouched, meaning that we come in and out of residency on each flush/sync, which is obviously suboptimal. To address this, let's move things around a bit: - Residency entry (flush) moves to vcpu_load - Residency exit (sync) moves to vcpu_put - On blocking (entry to WFI), we "put" - On unblocking (exit from WFI), we "load" Because these can nest (load/block/put/load/unblock/put, for example), we now have per-VPE tracking of the residency state. Additionally, vgic_v4_put gains a "need doorbell" parameter, which only gets set to true when blocking because of a WFI. This allows a finer control of the doorbell, which now also gets disabled as soon as it gets signaled. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20191027144234.8395-2-maz@kernel.org --- include/linux/irqchip/arm-gic-v4.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h index e6b155713b47..ab1396afe08a 100644 --- a/include/linux/irqchip/arm-gic-v4.h +++ b/include/linux/irqchip/arm-gic-v4.h @@ -35,6 +35,8 @@ struct its_vpe { /* Doorbell interrupt */ int irq; irq_hw_number_t vpe_db_lpi; + /* VPE resident */ + bool resident; /* VPE proxy mapping */ int vpe_proxy_event; /* -- cgit v1.2.3 From 74bddb3682f60df16ba24be335c94de348ba1b07 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 9 Oct 2019 13:09:24 -0300 Subject: RDMA/mlx5: Delete struct mlx5_priv->mkey_table No users are left, delete it. Link: https://lore.kernel.org/r/20191009160934.3143-5-jgg@ziepe.ca Reviewed-by: Artemy Kovalyov Signed-off-by: Jason Gunthorpe --- include/linux/mlx5/driver.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 3e80f03a387f..8288b62b8f37 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -556,8 +556,6 @@ struct mlx5_priv { struct dentry *cmdif_debugfs; /* end: qp staff */ - struct xarray mkey_table; - /* start: alloc staff */ /* protect buffer alocation according to numa node */ struct mutex alloc_mutex; @@ -942,8 +940,6 @@ struct mlx5_cmd_mailbox *mlx5_alloc_cmd_mailbox_chain(struct mlx5_core_dev *dev, gfp_t flags, int npages); void mlx5_free_cmd_mailbox_chain(struct mlx5_core_dev *dev, struct mlx5_cmd_mailbox *head); -void mlx5_init_mkey_table(struct mlx5_core_dev *dev); -void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev); int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, struct mlx5_async_ctx *async_ctx, u32 *in, -- cgit v1.2.3 From 802f4a827f139f2581b3c50c69d20f8bf4c24af1 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 16 Oct 2019 04:56:47 +0200 Subject: sched/vtime: Record CPU under seqcount for kcpustat needs In order to compute the kcpustat delta on a nohz CPU, we'll need to fetch the task running on that target. Checking that its vtime state snapshot actually refers to the relevant target involves recording that CPU under the seqcount locked on task switch. This is a step toward making kcpustat moving forward on full nohz CPUs. Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra (Intel) Cc: Jacek Anaszewski Cc: Linus Torvalds Cc: Pavel Machek Cc: Peter Zijlstra Cc: Rafael J . Wysocki Cc: Rik van Riel Cc: Thomas Gleixner Cc: Viresh Kumar Cc: Wanpeng Li Cc: Yauheni Kaliuta Link: https://lkml.kernel.org/r/20191016025700.31277-2-frederic@kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 2c2e56bd8913..d5d07335a97b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -259,6 +259,7 @@ struct vtime { seqcount_t seqcount; unsigned long long starttime; enum vtime_state state; + unsigned int cpu; u64 utime; u64 stime; u64 gtime; -- cgit v1.2.3 From 14faf6fcac4ba33f8fd8d9b2d0278010a9eb1742 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 16 Oct 2019 04:56:48 +0200 Subject: sched/cputime: Add vtime idle task state Record idle as a VTIME state instead of guessing it from VTIME_SYS and is_idle_task(). This is going to simplify the cputime read side especially as its state machine is going to further expand in order to fully support kcpustat on nohz_full. Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra (Intel) Cc: Jacek Anaszewski Cc: Linus Torvalds Cc: Pavel Machek Cc: Peter Zijlstra Cc: Rafael J . Wysocki Cc: Rik van Riel Cc: Thomas Gleixner Cc: Viresh Kumar Cc: Wanpeng Li Cc: Yauheni Kaliuta Link: https://lkml.kernel.org/r/20191016025700.31277-3-frederic@kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index d5d07335a97b..4ae19be2c126 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -249,10 +249,12 @@ struct prev_cputime { enum vtime_state { /* Task is sleeping or running in a CPU with VTIME inactive: */ VTIME_INACTIVE = 0, - /* Task runs in userspace in a CPU with VTIME active: */ - VTIME_USER, + /* Task is idle */ + VTIME_IDLE, /* Task runs in kernelspace in a CPU with VTIME active: */ VTIME_SYS, + /* Task runs in userspace in a CPU with VTIME active: */ + VTIME_USER, }; struct vtime { -- cgit v1.2.3 From e6d5bf3e321ca664d12eb00ceb40bd58987ce8a1 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 16 Oct 2019 04:56:49 +0200 Subject: sched/cputime: Add vtime guest task state Record guest as a VTIME state instead of guessing it from VTIME_SYS and PF_VCPU. This is going to simplify the cputime read side especially as its state machine is going to further expand in order to fully support kcpustat on nohz_full. Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra (Intel) Cc: Jacek Anaszewski Cc: Linus Torvalds Cc: Pavel Machek Cc: Peter Zijlstra Cc: Rafael J . Wysocki Cc: Rik van Riel Cc: Thomas Gleixner Cc: Viresh Kumar Cc: Wanpeng Li Cc: Yauheni Kaliuta Link: https://lkml.kernel.org/r/20191016025700.31277-4-frederic@kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 4ae19be2c126..988c4da00c31 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -255,6 +255,8 @@ enum vtime_state { VTIME_SYS, /* Task runs in userspace in a CPU with VTIME active: */ VTIME_USER, + /* Task runs as guests in a CPU with VTIME active: */ + VTIME_GUEST, }; struct vtime { -- cgit v1.2.3 From 0ca167c056ea3d637b7959e6e94010fa526b9b8c Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 16 Oct 2019 04:56:50 +0200 Subject: context_tracking: Remove context_tracking_active() This function is a leftover from old removal or rename. We can drop it. Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra (Intel) Cc: Jacek Anaszewski Cc: Linus Torvalds Cc: Pavel Machek Cc: Peter Zijlstra Cc: Rafael J . Wysocki Cc: Rik van Riel Cc: Thomas Gleixner Cc: Viresh Kumar Cc: Wanpeng Li Cc: Yauheni Kaliuta Link: https://lkml.kernel.org/r/20191016025700.31277-5-frederic@kernel.org Signed-off-by: Ingo Molnar --- include/linux/context_tracking_state.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index f128dc3be0df..f4633c2c29a5 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -42,7 +42,6 @@ static inline bool context_tracking_in_user(void) } #else static inline bool context_tracking_in_user(void) { return false; } -static inline bool context_tracking_active(void) { return false; } static inline bool context_tracking_is_enabled(void) { return false; } static inline bool context_tracking_cpu_is_enabled(void) { return false; } #endif /* CONFIG_CONTEXT_TRACKING */ -- cgit v1.2.3 From 74c578759f15cb5a0d0107759bdad671d7b52ab9 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 16 Oct 2019 04:56:51 +0200 Subject: context_tracking: Rename context_tracking_is_enabled() => context_tracking_enabled() Remove the superfluous "is" in the middle of the name. We want to standardize the naming so that it can be expanded through suffixes: context_tracking_enabled() context_tracking_enabled_cpu() context_tracking_enabled_this_cpu() Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra (Intel) Cc: Jacek Anaszewski Cc: Linus Torvalds Cc: Pavel Machek Cc: Peter Zijlstra Cc: Rafael J . Wysocki Cc: Rik van Riel Cc: Thomas Gleixner Cc: Viresh Kumar Cc: Wanpeng Li Cc: Yauheni Kaliuta Link: https://lkml.kernel.org/r/20191016025700.31277-6-frederic@kernel.org Signed-off-by: Ingo Molnar --- include/linux/context_tracking.h | 20 ++++++++++---------- include/linux/context_tracking_state.h | 8 ++++---- include/linux/tick.h | 2 +- include/linux/vtime.h | 2 +- 4 files changed, 16 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index 558a209c247d..f1601bac08dc 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -22,26 +22,26 @@ extern void context_tracking_user_exit(void); static inline void user_enter(void) { - if (context_tracking_is_enabled()) + if (context_tracking_enabled()) context_tracking_enter(CONTEXT_USER); } static inline void user_exit(void) { - if (context_tracking_is_enabled()) + if (context_tracking_enabled()) context_tracking_exit(CONTEXT_USER); } /* Called with interrupts disabled. */ static inline void user_enter_irqoff(void) { - if (context_tracking_is_enabled()) + if (context_tracking_enabled()) __context_tracking_enter(CONTEXT_USER); } static inline void user_exit_irqoff(void) { - if (context_tracking_is_enabled()) + if (context_tracking_enabled()) __context_tracking_exit(CONTEXT_USER); } @@ -49,7 +49,7 @@ static inline enum ctx_state exception_enter(void) { enum ctx_state prev_ctx; - if (!context_tracking_is_enabled()) + if (!context_tracking_enabled()) return 0; prev_ctx = this_cpu_read(context_tracking.state); @@ -61,7 +61,7 @@ static inline enum ctx_state exception_enter(void) static inline void exception_exit(enum ctx_state prev_ctx) { - if (context_tracking_is_enabled()) { + if (context_tracking_enabled()) { if (prev_ctx != CONTEXT_KERNEL) context_tracking_enter(prev_ctx); } @@ -77,7 +77,7 @@ static inline void exception_exit(enum ctx_state prev_ctx) */ static inline enum ctx_state ct_state(void) { - return context_tracking_is_enabled() ? + return context_tracking_enabled() ? this_cpu_read(context_tracking.state) : CONTEXT_DISABLED; } #else @@ -90,7 +90,7 @@ static inline void exception_exit(enum ctx_state prev_ctx) { } static inline enum ctx_state ct_state(void) { return CONTEXT_DISABLED; } #endif /* !CONFIG_CONTEXT_TRACKING */ -#define CT_WARN_ON(cond) WARN_ON(context_tracking_is_enabled() && (cond)) +#define CT_WARN_ON(cond) WARN_ON(context_tracking_enabled() && (cond)) #ifdef CONFIG_CONTEXT_TRACKING_FORCE extern void context_tracking_init(void); @@ -108,7 +108,7 @@ static inline void guest_enter_irqoff(void) else current->flags |= PF_VCPU; - if (context_tracking_is_enabled()) + if (context_tracking_enabled()) __context_tracking_enter(CONTEXT_GUEST); /* KVM does not hold any references to rcu protected data when it @@ -124,7 +124,7 @@ static inline void guest_enter_irqoff(void) static inline void guest_exit_irqoff(void) { - if (context_tracking_is_enabled()) + if (context_tracking_enabled()) __context_tracking_exit(CONTEXT_GUEST); if (vtime_accounting_cpu_enabled()) diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index f4633c2c29a5..91250bdf2060 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -23,12 +23,12 @@ struct context_tracking { }; #ifdef CONFIG_CONTEXT_TRACKING -extern struct static_key_false context_tracking_enabled; +extern struct static_key_false context_tracking_key; DECLARE_PER_CPU(struct context_tracking, context_tracking); -static inline bool context_tracking_is_enabled(void) +static inline bool context_tracking_enabled(void) { - return static_branch_unlikely(&context_tracking_enabled); + return static_branch_unlikely(&context_tracking_key); } static inline bool context_tracking_cpu_is_enabled(void) @@ -42,7 +42,7 @@ static inline bool context_tracking_in_user(void) } #else static inline bool context_tracking_in_user(void) { return false; } -static inline bool context_tracking_is_enabled(void) { return false; } +static inline bool context_tracking_enabled(void) { return false; } static inline bool context_tracking_cpu_is_enabled(void) { return false; } #endif /* CONFIG_CONTEXT_TRACKING */ diff --git a/include/linux/tick.h b/include/linux/tick.h index f92a10b5e112..7e050a356cc5 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -174,7 +174,7 @@ extern cpumask_var_t tick_nohz_full_mask; static inline bool tick_nohz_full_enabled(void) { - if (!context_tracking_is_enabled()) + if (!context_tracking_enabled()) return false; return tick_nohz_full_running; diff --git a/include/linux/vtime.h b/include/linux/vtime.h index d9160ab3667a..0fc7f11f7aa4 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -28,7 +28,7 @@ extern void vtime_task_switch(struct task_struct *prev); */ static inline bool vtime_accounting_enabled(void) { - return context_tracking_is_enabled(); + return context_tracking_enabled(); } static inline bool vtime_accounting_cpu_enabled(void) -- cgit v1.2.3 From 84e0dacd0c347e9ee2531052013babd84683245f Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 16 Oct 2019 04:56:52 +0200 Subject: context_tracking: Rename context_tracking_is_cpu_enabled() to context_tracking_enabled_this_cpu() Standardize the naming on top of the context_tracking_enabled_*() base. Also make it clear we are checking the context tracking state of the *current* CPU with this function. We'll need to add an API to check that state on remote CPUs as well, so we must disambiguate the naming. Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra (Intel) Cc: Jacek Anaszewski Cc: Linus Torvalds Cc: Pavel Machek Cc: Peter Zijlstra Cc: Rafael J . Wysocki Cc: Rik van Riel Cc: Thomas Gleixner Cc: Viresh Kumar Cc: Wanpeng Li Cc: Yauheni Kaliuta Link: https://lkml.kernel.org/r/20191016025700.31277-7-frederic@kernel.org Signed-off-by: Ingo Molnar --- include/linux/context_tracking.h | 2 +- include/linux/context_tracking_state.h | 4 ++-- include/linux/vtime.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index f1601bac08dc..c9065ad518a7 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -118,7 +118,7 @@ static inline void guest_enter_irqoff(void) * one time slice). Lets treat guest mode as quiescent state, just like * we do with user-mode execution. */ - if (!context_tracking_cpu_is_enabled()) + if (!context_tracking_enabled_this_cpu()) rcu_virt_note_context_switch(smp_processor_id()); } diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index 91250bdf2060..08f125f6b31b 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -31,7 +31,7 @@ static inline bool context_tracking_enabled(void) return static_branch_unlikely(&context_tracking_key); } -static inline bool context_tracking_cpu_is_enabled(void) +static inline bool context_tracking_enabled_this_cpu(void) { return __this_cpu_read(context_tracking.active); } @@ -43,7 +43,7 @@ static inline bool context_tracking_in_user(void) #else static inline bool context_tracking_in_user(void) { return false; } static inline bool context_tracking_enabled(void) { return false; } -static inline bool context_tracking_cpu_is_enabled(void) { return false; } +static inline bool context_tracking_enabled_this_cpu(void) { return false; } #endif /* CONFIG_CONTEXT_TRACKING */ #endif diff --git a/include/linux/vtime.h b/include/linux/vtime.h index 0fc7f11f7aa4..54e91511250b 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -34,7 +34,7 @@ static inline bool vtime_accounting_enabled(void) static inline bool vtime_accounting_cpu_enabled(void) { if (vtime_accounting_enabled()) { - if (context_tracking_cpu_is_enabled()) + if (context_tracking_enabled_this_cpu()) return true; } -- cgit v1.2.3 From 097f2541c6e51e0c1cdb1e6d46ef08a624336518 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 16 Oct 2019 04:56:53 +0200 Subject: context_tracking: Introduce context_tracking_enabled_cpu() This allows us to check if a remote CPU runs context tracking (ie: is nohz_full). We'll need that to reliably support "nice" accounting on kcpustat. Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra (Intel) Cc: Jacek Anaszewski Cc: Linus Torvalds Cc: Pavel Machek Cc: Peter Zijlstra Cc: Rafael J . Wysocki Cc: Rik van Riel Cc: Thomas Gleixner Cc: Viresh Kumar Cc: Wanpeng Li Cc: Yauheni Kaliuta Link: https://lkml.kernel.org/r/20191016025700.31277-8-frederic@kernel.org Signed-off-by: Ingo Molnar --- include/linux/context_tracking_state.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index 08f125f6b31b..587717705727 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -31,6 +31,11 @@ static inline bool context_tracking_enabled(void) return static_branch_unlikely(&context_tracking_key); } +static inline bool context_tracking_enabled_cpu(int cpu) +{ + return per_cpu(context_tracking.active, cpu); +} + static inline bool context_tracking_enabled_this_cpu(void) { return __this_cpu_read(context_tracking.active); @@ -43,6 +48,7 @@ static inline bool context_tracking_in_user(void) #else static inline bool context_tracking_in_user(void) { return false; } static inline bool context_tracking_enabled(void) { return false; } +static inline bool context_tracking_enabled_cpu(int cpu) { return false; } static inline bool context_tracking_enabled_this_cpu(void) { return false; } #endif /* CONFIG_CONTEXT_TRACKING */ -- cgit v1.2.3 From e44fcb4b7a299602fb300b82a546c0b8a50d9d90 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 16 Oct 2019 04:56:54 +0200 Subject: sched/vtime: Rename vtime_accounting_cpu_enabled() to vtime_accounting_enabled_this_cpu() Standardize the naming on top of the vtime_accounting_enabled_*() base. Also make it clear we are checking the vtime state of the *current* CPU with this function. We'll need to add an API to check that state on remote CPUs as well, so we must disambiguate the naming. Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra (Intel) Cc: Jacek Anaszewski Cc: Linus Torvalds Cc: Pavel Machek Cc: Peter Zijlstra Cc: Rafael J . Wysocki Cc: Rik van Riel Cc: Thomas Gleixner Cc: Viresh Kumar Cc: Wanpeng Li Cc: Yauheni Kaliuta Link: https://lkml.kernel.org/r/20191016025700.31277-9-frederic@kernel.org Signed-off-by: Ingo Molnar --- include/linux/context_tracking.h | 4 ++-- include/linux/vtime.h | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index c9065ad518a7..64ec82851aa3 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -103,7 +103,7 @@ static inline void context_tracking_init(void) { } /* must be called with irqs disabled */ static inline void guest_enter_irqoff(void) { - if (vtime_accounting_cpu_enabled()) + if (vtime_accounting_enabled_this_cpu()) vtime_guest_enter(current); else current->flags |= PF_VCPU; @@ -127,7 +127,7 @@ static inline void guest_exit_irqoff(void) if (context_tracking_enabled()) __context_tracking_exit(CONTEXT_GUEST); - if (vtime_accounting_cpu_enabled()) + if (vtime_accounting_enabled_this_cpu()) vtime_guest_exit(current); else current->flags &= ~PF_VCPU; diff --git a/include/linux/vtime.h b/include/linux/vtime.h index 54e91511250b..eb2e7a19054b 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -11,11 +11,11 @@ struct task_struct; /* - * vtime_accounting_cpu_enabled() definitions/declarations + * vtime_accounting_enabled_this_cpu() definitions/declarations */ #if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) -static inline bool vtime_accounting_cpu_enabled(void) { return true; } +static inline bool vtime_accounting_enabled_this_cpu(void) { return true; } extern void vtime_task_switch(struct task_struct *prev); #elif defined(CONFIG_VIRT_CPU_ACCOUNTING_GEN) @@ -31,7 +31,7 @@ static inline bool vtime_accounting_enabled(void) return context_tracking_enabled(); } -static inline bool vtime_accounting_cpu_enabled(void) +static inline bool vtime_accounting_enabled_this_cpu(void) { if (vtime_accounting_enabled()) { if (context_tracking_enabled_this_cpu()) @@ -45,13 +45,13 @@ extern void vtime_task_switch_generic(struct task_struct *prev); static inline void vtime_task_switch(struct task_struct *prev) { - if (vtime_accounting_cpu_enabled()) + if (vtime_accounting_enabled_this_cpu()) vtime_task_switch_generic(prev); } #else /* !CONFIG_VIRT_CPU_ACCOUNTING */ -static inline bool vtime_accounting_cpu_enabled(void) { return false; } +static inline bool vtime_accounting_enabled_this_cpu(void) { return false; } static inline void vtime_task_switch(struct task_struct *prev) { } #endif -- cgit v1.2.3 From 9adbb9dd4c4eb45e1129fc73d8de69ca72350f81 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 16 Oct 2019 04:56:55 +0200 Subject: sched/vtime: Introduce vtime_accounting_enabled_cpu() This allows us to check if a remote CPU runs vtime accounting (ie: is nohz_full). We'll need that to reliably support reading kcpustat on nohz_full CPUs. Also simplify a bit the condition in the local flavoured function while at it. Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra (Intel) Cc: Jacek Anaszewski Cc: Linus Torvalds Cc: Pavel Machek Cc: Peter Zijlstra Cc: Rafael J . Wysocki Cc: Rik van Riel Cc: Thomas Gleixner Cc: Viresh Kumar Cc: Wanpeng Li Cc: Yauheni Kaliuta Link: https://lkml.kernel.org/r/20191016025700.31277-10-frederic@kernel.org Signed-off-by: Ingo Molnar --- include/linux/vtime.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vtime.h b/include/linux/vtime.h index eb2e7a19054b..e2733bf33541 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -31,14 +31,14 @@ static inline bool vtime_accounting_enabled(void) return context_tracking_enabled(); } -static inline bool vtime_accounting_enabled_this_cpu(void) +static inline bool vtime_accounting_enabled_cpu(int cpu) { - if (vtime_accounting_enabled()) { - if (context_tracking_enabled_this_cpu()) - return true; - } + return (vtime_accounting_enabled() && context_tracking_enabled_cpu(cpu)); +} - return false; +static inline bool vtime_accounting_enabled_this_cpu(void) +{ + return (vtime_accounting_enabled() && context_tracking_enabled_this_cpu()); } extern void vtime_task_switch_generic(struct task_struct *prev); @@ -51,6 +51,7 @@ static inline void vtime_task_switch(struct task_struct *prev) #else /* !CONFIG_VIRT_CPU_ACCOUNTING */ +static inline bool vtime_accounting_enabled_cpu(int cpu) {return false; } static inline bool vtime_accounting_enabled_this_cpu(void) { return false; } static inline void vtime_task_switch(struct task_struct *prev) { } -- cgit v1.2.3 From 023e9deb51c9e1aafacbd421e55beadcb8e87f53 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 16 Oct 2019 04:56:56 +0200 Subject: context_tracking: Check static key on context_tracking_enabled_*cpu() guest_enter() doesn't call context_tracking_enabled() before calling context_tracking_enabled_this_cpu(). Therefore the guest code doesn't benefit from the static key on the fast path. Just make sure that context_tracking_enabled_*cpu() functions check the static key by themselves to propagate the optimization. Reported-by: Peter Zijlstra Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra (Intel) Cc: Jacek Anaszewski Cc: Linus Torvalds Cc: Pavel Machek Cc: Rafael J . Wysocki Cc: Rik van Riel Cc: Thomas Gleixner Cc: Viresh Kumar Cc: Wanpeng Li Cc: Yauheni Kaliuta Link: https://lkml.kernel.org/r/20191016025700.31277-11-frederic@kernel.org Signed-off-by: Ingo Molnar --- include/linux/context_tracking_state.h | 4 ++-- include/linux/vtime.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index 587717705727..e7fe6678b7ad 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -33,12 +33,12 @@ static inline bool context_tracking_enabled(void) static inline bool context_tracking_enabled_cpu(int cpu) { - return per_cpu(context_tracking.active, cpu); + return context_tracking_enabled() && per_cpu(context_tracking.active, cpu); } static inline bool context_tracking_enabled_this_cpu(void) { - return __this_cpu_read(context_tracking.active); + return context_tracking_enabled() && __this_cpu_read(context_tracking.active); } static inline bool context_tracking_in_user(void) diff --git a/include/linux/vtime.h b/include/linux/vtime.h index e2733bf33541..2cdeca062db3 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -33,12 +33,12 @@ static inline bool vtime_accounting_enabled(void) static inline bool vtime_accounting_enabled_cpu(int cpu) { - return (vtime_accounting_enabled() && context_tracking_enabled_cpu(cpu)); + return context_tracking_enabled_cpu(cpu); } static inline bool vtime_accounting_enabled_this_cpu(void) { - return (vtime_accounting_enabled() && context_tracking_enabled_this_cpu()); + return context_tracking_enabled_this_cpu(); } extern void vtime_task_switch_generic(struct task_struct *prev); -- cgit v1.2.3 From 64eea63c19a2c386a96638f4e54a1355510709e3 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 25 Oct 2019 04:03:03 +0200 Subject: sched/kcpustat: Introduce vtime-aware kcpustat accessor for CPUTIME_SYSTEM Kcpustat is not correctly supported on nohz_full CPUs. The tick doesn't fire and the cputime therefore doesn't move forward. The issue has shown up after the vanishing of the remaining 1Hz which has made the stall visible. We are solving that with checking the task running on a CPU through RCU and reading its vtime delta that we add to the raw kcpustat values. We make sure that we fetch a coherent raw-kcpustat/vtime-delta couple sequence while checking that the CPU referred by the target vtime is the correct one, under the locked vtime seqcount. Only CPUTIME_SYSTEM is handled here as a start because it's the trivial case. User and guest time will require more preparation work to correctly handle niceness. Reported-by: Yauheni Kaliuta Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Wanpeng Li Link: https://lkml.kernel.org/r/20191025020303.19342-1-frederic@kernel.org Signed-off-by: Ingo Molnar --- include/linux/kernel_stat.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 7ee2bb43b251..79781196eb25 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -78,6 +78,17 @@ static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu) return kstat_cpu(cpu).irqs_sum; } +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN +extern u64 kcpustat_field(struct kernel_cpustat *kcpustat, + enum cpu_usage_stat usage, int cpu); +#else +static inline u64 kcpustat_field(struct kernel_cpustat *kcpustat, + enum cpu_usage_stat usage, int cpu) +{ + return kcpustat->cpustat[usage]; +} +#endif + extern void account_user_time(struct task_struct *, u64); extern void account_guest_time(struct task_struct *, u64); extern void account_system_time(struct task_struct *, int, u64); -- cgit v1.2.3 From 494f8b10d832456a96be4ee7317425f6936cabc8 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 28 Oct 2019 11:32:32 -0500 Subject: resource: Add a resource_list_first_type helper A common pattern is looping over a resource_list just to get a matching entry with a specific type. Add resource_list_first_type() helper which implements this. Signed-off-by: Rob Herring Signed-off-by: Lorenzo Pieralisi --- include/linux/resource_ext.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/resource_ext.h b/include/linux/resource_ext.h index 06da59b23b79..ff0339df56af 100644 --- a/include/linux/resource_ext.h +++ b/include/linux/resource_ext.h @@ -66,4 +66,16 @@ resource_list_destroy_entry(struct resource_entry *entry) #define resource_list_for_each_entry_safe(entry, tmp, list) \ list_for_each_entry_safe((entry), (tmp), (list), node) +static inline struct resource_entry * +resource_list_first_type(struct list_head *list, unsigned long type) +{ + struct resource_entry *entry; + + resource_list_for_each_entry(entry, list) { + if (resource_type(entry->res) == type) + return entry; + } + return NULL; +} + #endif /* _LINUX_RESOURCE_EXT_H */ -- cgit v1.2.3 From f7907e57aea2adcd0b57ebcca410e125412ab680 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Tue, 29 Oct 2019 01:21:31 +0000 Subject: regulator: fixed: add off-on-delay Depends on board design, the gpio controlling regulator may connects with a big capacitance. When need off, it takes some time to let the regulator to be truly off. If not add enough delay, the regulator might have always been on, so introduce off-on-delay to handle such case. Signed-off-by: Peng Fan Link: https://lore.kernel.org/r/1572311875-22880-3-git-send-email-peng.fan@nxp.com Signed-off-by: Mark Brown --- include/linux/regulator/fixed.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/regulator/fixed.h b/include/linux/regulator/fixed.h index d44ce5f18a56..55319943fcc5 100644 --- a/include/linux/regulator/fixed.h +++ b/include/linux/regulator/fixed.h @@ -36,6 +36,7 @@ struct fixed_voltage_config { const char *input_supply; int microvolts; unsigned startup_delay; + unsigned int off_on_delay; unsigned enabled_at_boot:1; struct regulator_init_data *init_data; }; -- cgit v1.2.3 From 80327ce3d4edaa9abde1c6e1a1785572c7de3750 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Mon, 28 Oct 2019 13:37:09 +0100 Subject: gpu: host1x: Overhaul host1x_bo_{pin,unpin}() API The host1x_bo_pin() and host1x_bo_unpin() APIs are used to pin and unpin buffers during host1x job submission. Pinning currently returns the SG table and the DMA address (an IOVA if an IOMMU is used or a physical address if no IOMMU is used) of the buffer. The DMA address is only used for buffers that are relocated, whereas the host1x driver will map gather buffers into its own IOVA space so that they can be processed by the CDMA engine. This approach has a couple of issues. On one hand it's not very useful to return a DMA address for the buffer if host1x doesn't need it. On the other hand, returning the SG table of the buffer is suboptimal because a single SG table cannot be shared for multiple mappings, because the DMA address is stored within the SG table, and the DMA address may be different for different devices. Subsequent patches will move the host1x driver over to the DMA API which doesn't work with a single shared SG table. Fix this by returning a new SG table each time a buffer is pinned. This allows the buffer to be referenced by multiple jobs for different engines. Change the prototypes of host1x_bo_pin() and host1x_bo_unpin() to take a struct device *, specifying the device for which the buffer should be pinned. This is required in order to be able to properly construct the SG table. While at it, make host1x_bo_pin() return the SG table because that allows us to return an ERR_PTR()-encoded error code if we need to, or return NULL to signal that we don't need the SG table to be remapped and can simply use the DMA address as-is. At the same time, returning the DMA address is made optional because in the example of command buffers, host1x doesn't need to know the DMA address since it will have to create its own mapping anyway. Signed-off-by: Thierry Reding --- include/linux/host1x.h | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/host1x.h b/include/linux/host1x.h index df6e613ba715..1ba23a6a2021 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -67,8 +67,9 @@ struct sg_table; struct host1x_bo_ops { struct host1x_bo *(*get)(struct host1x_bo *bo); void (*put)(struct host1x_bo *bo); - dma_addr_t (*pin)(struct host1x_bo *bo, struct sg_table **sgt); - void (*unpin)(struct host1x_bo *bo, struct sg_table *sgt); + struct sg_table *(*pin)(struct device *dev, struct host1x_bo *bo, + dma_addr_t *phys); + void (*unpin)(struct device *dev, struct sg_table *sgt); void *(*mmap)(struct host1x_bo *bo); void (*munmap)(struct host1x_bo *bo, void *addr); void *(*kmap)(struct host1x_bo *bo, unsigned int pagenum); @@ -95,15 +96,17 @@ static inline void host1x_bo_put(struct host1x_bo *bo) bo->ops->put(bo); } -static inline dma_addr_t host1x_bo_pin(struct host1x_bo *bo, - struct sg_table **sgt) +static inline struct sg_table *host1x_bo_pin(struct device *dev, + struct host1x_bo *bo, + dma_addr_t *phys) { - return bo->ops->pin(bo, sgt); + return bo->ops->pin(dev, bo, phys); } -static inline void host1x_bo_unpin(struct host1x_bo *bo, struct sg_table *sgt) +static inline void host1x_bo_unpin(struct device *dev, struct host1x_bo *bo, + struct sg_table *sgt) { - bo->ops->unpin(bo, sgt); + bo->ops->unpin(dev, sgt); } static inline void *host1x_bo_mmap(struct host1x_bo *bo) -- cgit v1.2.3 From ab4f81bfc2a8d429130182f8ea3f29a8b1754931 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Mon, 28 Oct 2019 13:37:11 +0100 Subject: gpu: host1x: Add direction flags to relocations Add direction flags to host1x relocations performed during job pinning. These flags indicate the kinds of accesses that hardware is allowed to perform on the relocated buffers. Signed-off-by: Thierry Reding --- include/linux/host1x.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 1ba23a6a2021..6f8d772591ba 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -173,6 +173,9 @@ int host1x_job_submit(struct host1x_job *job); * host1x job */ +#define HOST1X_RELOC_READ (1 << 0) +#define HOST1X_RELOC_WRITE (1 << 1) + struct host1x_reloc { struct { struct host1x_bo *bo; @@ -183,6 +186,7 @@ struct host1x_reloc { unsigned long offset; } target; unsigned long shift; + unsigned long flags; }; struct host1x_job { -- cgit v1.2.3 From 771b53d033e8663abdf59704806aa856b236dcdb Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 22 Oct 2019 10:25:58 -0600 Subject: io-wq: small threadpool implementation for io_uring This adds support for io-wq, a smaller and specialized thread pool implementation. This is meant to replace workqueues for io_uring. Among the reasons for this addition are: - We can assign memory context smarter and more persistently if we manage the life time of threads. - We can drop various work-arounds we have in io_uring, like the async_list. - We can implement hashed work insertion, to manage concurrency of buffered writes without needing a) an extra workqueue, or b) needlessly making the concurrency of said workqueue very low which hurts performance of multiple buffered file writers. - We can implement cancel through signals, for cancelling interruptible work like read/write (or send/recv) to/from sockets. - We need the above cancel for being able to assign and use file tables from a process. - We can implement a more thorough cancel operation in general. - We need it to move towards a syslet/threadlet model for even faster async execution. For that we need to take ownership of the used threads. This list is just off the top of my head. Performance should be the same, or better, at least that's what I've seen in my testing. io-wq supports basic NUMA functionality, setting up a pool per node. io-wq hooks up to the scheduler schedule in/out just like workqueue and uses that to drive the need for more/less workers. Acked-by: Peter Zijlstra (Intel) Signed-off-by: Jens Axboe --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 67a1d86981a9..6666e25606b7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1468,6 +1468,7 @@ extern struct pid *cad_pid; #define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */ #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ #define PF_MEMALLOC_NOCMA 0x10000000 /* All allocation request will have _GFP_MOVABLE cleared */ +#define PF_IO_WORKER 0x20000000 /* Task is an IO worker */ #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ #define PF_SUSPEND_TASK 0x80000000 /* This thread called freeze_processes() and should not be frozen */ -- cgit v1.2.3 From de2ea4b64b75a79ed9cdf9bf30e0e197901084e4 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 17 Oct 2019 14:41:29 -0600 Subject: net: add __sys_accept4_file() helper This is identical to __sys_accept4(), except it takes a struct file instead of an fd, and it also allows passing in extra file->f_flags flags. The latter is done to support masking in O_NONBLOCK without manipulating the original file flags. No functional changes in this patch. Cc: netdev@vger.kernel.org Acked-by: David S. Miller Signed-off-by: Jens Axboe --- include/linux/socket.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index fc0bed59fc84..dd061f741bc1 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -392,6 +392,9 @@ extern int __sys_recvfrom(int fd, void __user *ubuf, size_t size, extern int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags, struct sockaddr __user *addr, int addr_len); +extern int __sys_accept4_file(struct file *file, unsigned file_flags, + struct sockaddr __user *upeer_sockaddr, + int __user *upeer_addrlen, int flags); extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen, int flags); extern int __sys_socket(int family, int type, int protocol); -- cgit v1.2.3 From b4941adb24c0676f77ddc25e6d7836b8245c47fc Mon Sep 17 00:00:00 2001 From: Ran Wang Date: Thu, 24 Oct 2019 17:26:42 +0800 Subject: PM: wakeup: Add routine to help fetch wakeup source object. Some user might want to go through all registered wakeup sources and doing things accordingly. For example, SoC PM driver might need to do HW programming to prevent powering down specific IP which wakeup source depending on. So add this API to help walk through all registered wakeup source objects on that list and return them one by one. Signed-off-by: Ran Wang Tested-by: Leonard Crestez Reviewed-by: Rafael J. Wysocki Signed-off-by: Li Yang --- include/linux/pm_wakeup.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h index 661efa029c96..aa3da6611533 100644 --- a/include/linux/pm_wakeup.h +++ b/include/linux/pm_wakeup.h @@ -63,6 +63,11 @@ struct wakeup_source { bool autosleep_enabled:1; }; +#define for_each_wakeup_source(ws) \ + for ((ws) = wakeup_sources_walk_start(); \ + (ws); \ + (ws) = wakeup_sources_walk_next((ws))) + #ifdef CONFIG_PM_SLEEP /* @@ -92,6 +97,10 @@ extern void wakeup_source_remove(struct wakeup_source *ws); extern struct wakeup_source *wakeup_source_register(struct device *dev, const char *name); extern void wakeup_source_unregister(struct wakeup_source *ws); +extern int wakeup_sources_read_lock(void); +extern void wakeup_sources_read_unlock(int idx); +extern struct wakeup_source *wakeup_sources_walk_start(void); +extern struct wakeup_source *wakeup_sources_walk_next(struct wakeup_source *ws); extern int device_wakeup_enable(struct device *dev); extern int device_wakeup_disable(struct device *dev); extern void device_set_wakeup_capable(struct device *dev, bool capable); -- cgit v1.2.3 From 8e6af017f4b1da9cdd2b55ce83853df8e167b4d3 Mon Sep 17 00:00:00 2001 From: Ethan Hansen <1ethanhansen@gmail.com> Date: Fri, 2 Aug 2019 13:37:58 -0700 Subject: rcu: Remove unused function hlist_bl_del_init_rcu() The function hlist_bl_del_init_rcu() is declared in rculist_bl.h, but never used. This commit therefore removes it. Signed-off-by: Ethan Hansen <1ethanhansen@gmail.com> Signed-off-by: Paul E. McKenney --- include/linux/rculist_bl.h | 28 ---------------------------- 1 file changed, 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rculist_bl.h b/include/linux/rculist_bl.h index 66e73ec1aa99..0b952d06eb0b 100644 --- a/include/linux/rculist_bl.h +++ b/include/linux/rculist_bl.h @@ -24,34 +24,6 @@ static inline struct hlist_bl_node *hlist_bl_first_rcu(struct hlist_bl_head *h) ((unsigned long)rcu_dereference_check(h->first, hlist_bl_is_locked(h)) & ~LIST_BL_LOCKMASK); } -/** - * hlist_bl_del_init_rcu - deletes entry from hash list with re-initialization - * @n: the element to delete from the hash list. - * - * Note: hlist_bl_unhashed() on the node returns true after this. It is - * useful for RCU based read lockfree traversal if the writer side - * must know if the list entry is still hashed or already unhashed. - * - * In particular, it means that we can not poison the forward pointers - * that may still be used for walking the hash list and we can only - * zero the pprev pointer so list_unhashed() will return true after - * this. - * - * The caller must take whatever precautions are necessary (such as - * holding appropriate locks) to avoid racing with another - * list-mutation primitive, such as hlist_bl_add_head_rcu() or - * hlist_bl_del_rcu(), running on this same list. However, it is - * perfectly legal to run concurrently with the _rcu list-traversal - * primitives, such as hlist_bl_for_each_entry_rcu(). - */ -static inline void hlist_bl_del_init_rcu(struct hlist_bl_node *n) -{ - if (!hlist_bl_unhashed(n)) { - __hlist_bl_del(n); - n->pprev = NULL; - } -} - /** * hlist_bl_del_rcu - deletes entry from hash list without re-initialization * @n: the element to delete from the hash list. -- cgit v1.2.3 From a63fc6b75cca984c71f095282e0227a390ba88f3 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 23 Sep 2019 15:05:11 -0700 Subject: rcu: Upgrade rcu_swap_protected() to rcu_replace_pointer() Although the rcu_swap_protected() macro follows the example of swap(), the interactions with RCU make its update of its argument somewhat counter-intuitive. This commit therefore introduces an rcu_replace_pointer() that returns the old value of the RCU pointer instead of doing the argument update. Once all the uses of rcu_swap_protected() are updated to instead use rcu_replace_pointer(), rcu_swap_protected() will be removed. Link: https://lore.kernel.org/lkml/CAHk-=wiAsJLw1egFEE=Z7-GGtM6wcvtyytXZA1+BHqta4gg6Hw@mail.gmail.com/ Reported-by: Linus Torvalds [ paulmck: From rcu_replace() to rcu_replace_pointer() per Ingo Molnar. ] Signed-off-by: Paul E. McKenney Cc: Bart Van Assche Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Johannes Thumshirn Cc: Shane M Seymour Cc: Martin K. Petersen --- include/linux/rcupdate.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 75a2eded7aa2..185dd9736863 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -382,6 +382,24 @@ do { \ smp_store_release(&p, RCU_INITIALIZER((typeof(p))_r_a_p__v)); \ } while (0) +/** + * rcu_replace_pointer() - replace an RCU pointer, returning its old value + * @rcu_ptr: RCU pointer, whose old value is returned + * @ptr: regular pointer + * @c: the lockdep conditions under which the dereference will take place + * + * Perform a replacement, where @rcu_ptr is an RCU-annotated + * pointer and @c is the lockdep argument that is passed to the + * rcu_dereference_protected() call used to read that pointer. The old + * value of @rcu_ptr is returned, and @rcu_ptr is set to @ptr. + */ +#define rcu_replace_pointer(rcu_ptr, ptr, c) \ +({ \ + typeof(ptr) __tmp = rcu_dereference_protected((rcu_ptr), (c)); \ + rcu_assign_pointer((rcu_ptr), (ptr)); \ + __tmp; \ +}) + /** * rcu_swap_protected() - swap an RCU and a regular pointer * @rcu_ptr: RCU pointer -- cgit v1.2.3 From a445e940ea686fc60475564009821010eb213be3 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Wed, 30 Oct 2019 10:13:13 +0000 Subject: dma-mapping: fix handling of dma-ranges for reserved memory (again) Daniele reported that issue previously fixed in c41f9ea998f3 ("drivers: dma-coherent: Account dma_pfn_offset when used with device tree") reappear shortly after 43fc509c3efb ("dma-coherent: introduce interface for default DMA pool") where fix was accidentally dropped. Lets put fix back in place and respect dma-ranges for reserved memory. Fixes: 43fc509c3efb ("dma-coherent: introduce interface for default DMA pool") Reported-by: Daniele Alessandrelli Tested-by: Daniele Alessandrelli Tested-by: Alexandre Torgue Signed-off-by: Vladimir Murzin Signed-off-by: Christoph Hellwig --- include/linux/dma-mapping.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 4a1c4fca475a..10918c55003f 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -162,7 +162,7 @@ int dma_release_from_dev_coherent(struct device *dev, int order, void *vaddr); int dma_mmap_from_dev_coherent(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, size_t size, int *ret); -void *dma_alloc_from_global_coherent(ssize_t size, dma_addr_t *dma_handle); +void *dma_alloc_from_global_coherent(struct device *dev, ssize_t size, dma_addr_t *dma_handle); int dma_release_from_global_coherent(int order, void *vaddr); int dma_mmap_from_global_coherent(struct vm_area_struct *vma, void *cpu_addr, size_t size, int *ret); @@ -172,7 +172,7 @@ int dma_mmap_from_global_coherent(struct vm_area_struct *vma, void *cpu_addr, #define dma_release_from_dev_coherent(dev, order, vaddr) (0) #define dma_mmap_from_dev_coherent(dev, vma, vaddr, order, ret) (0) -static inline void *dma_alloc_from_global_coherent(ssize_t size, +static inline void *dma_alloc_from_global_coherent(struct device *dev, ssize_t size, dma_addr_t *dma_handle) { return NULL; -- cgit v1.2.3 From 4544b9f25e70eae9f70a243de0cc802aa5c8cb69 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 29 Oct 2019 14:34:22 -0700 Subject: dma-mapping: Add vmap checks to dma_map_single() As we've seen from USB and other areas[1], we need to always do runtime checks for DMA operating on memory regions that might be remapped. This adds vmap checks (similar to those already in USB but missing in other places) into dma_map_single() so all callers benefit from the checking. [1] https://git.kernel.org/linus/3840c5b78803b2b6cc1ff820100a74a092c40cbb Suggested-by: Laura Abbott Signed-off-by: Kees Cook [hch: fixed the printk message] Signed-off-by: Christoph Hellwig --- include/linux/dma-mapping.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 10918c55003f..4d450672b7d6 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -583,6 +583,10 @@ static inline unsigned long dma_get_merge_boundary(struct device *dev) static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr, size_t size, enum dma_data_direction dir, unsigned long attrs) { + /* DMA must never operate on areas that might be remapped. */ + if (dev_WARN_ONCE(dev, is_vmalloc_addr(ptr), + "rejecting DMA map of vmalloc memory\n")) + return DMA_MAPPING_ERROR; debug_dma_map_single(dev, ptr, size); return dma_map_page_attrs(dev, virt_to_page(ptr), offset_in_page(ptr), size, dir, attrs); -- cgit v1.2.3 From fae7d7d5f374eadbb0b5dd31b39162e7176e9c3d Mon Sep 17 00:00:00 2001 From: Sean Paul Date: Wed, 30 Oct 2019 16:29:54 -0400 Subject: Revert "dma-buf: Add dma-buf heaps framework" This reverts commit a69b0e855d3fd278ff6f09a23e1edf929538e304. This patchset doesn't meet the UAPI requirements set out in [1] for the DRM subsystem. Once the userspace component is reviewed and ready for merge we can try again. [1]- https://01.org/linuxgraphics/gfx-docs/drm/gpu/drm-uapi.html#open-source-userspace-requirements Fixes: a69b0e855d3f ("dma-buf: Add dma-buf heaps framework") Cc: Laura Abbott Cc: Benjamin Gaignard Cc: Sumit Semwal Cc: Liam Mark Cc: Pratik Patel Cc: Brian Starkey Cc: Vincent Donnefort Cc: Sudipto Paul Cc: Andrew F. Davis Cc: Christoph Hellwig Cc: Chenbo Feng Cc: Alistair Strachan Cc: Hridya Valsaraju Cc: Hillf Danton Cc: dri-devel@lists.freedesktop.org Cc: Brian Starkey Cc: John Stultz Cc: Mauro Carvalho Chehab Cc: "David S. Miller" Cc: Greg Kroah-Hartman Cc: Rob Herring Cc: Jonathan Cameron Cc: "Paul E. McKenney" Cc: Sean Paul Cc: "Andrew F. Davis" Cc: linux-media@vger.kernel.org Cc: linaro-mm-sig@lists.linaro.org Acked-by: David Airlie Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20191030203003.101156-6-sean@poorly.run --- include/linux/dma-heap.h | 59 ------------------------------------------------ 1 file changed, 59 deletions(-) delete mode 100644 include/linux/dma-heap.h (limited to 'include/linux') diff --git a/include/linux/dma-heap.h b/include/linux/dma-heap.h deleted file mode 100644 index 454e354d1ffb..000000000000 --- a/include/linux/dma-heap.h +++ /dev/null @@ -1,59 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * DMABUF Heaps Allocation Infrastructure - * - * Copyright (C) 2011 Google, Inc. - * Copyright (C) 2019 Linaro Ltd. - */ - -#ifndef _DMA_HEAPS_H -#define _DMA_HEAPS_H - -#include -#include - -struct dma_heap; - -/** - * struct dma_heap_ops - ops to operate on a given heap - * @allocate: allocate dmabuf and return fd - * - * allocate returns dmabuf fd on success, -errno on error. - */ -struct dma_heap_ops { - int (*allocate)(struct dma_heap *heap, - unsigned long len, - unsigned long fd_flags, - unsigned long heap_flags); -}; - -/** - * struct dma_heap_export_info - information needed to export a new dmabuf heap - * @name: used for debugging/device-node name - * @ops: ops struct for this heap - * @priv: heap exporter private data - * - * Information needed to export a new dmabuf heap. - */ -struct dma_heap_export_info { - const char *name; - const struct dma_heap_ops *ops; - void *priv; -}; - -/** - * dma_heap_get_drvdata() - get per-heap driver data - * @heap: DMA-Heap to retrieve private data for - * - * Returns: - * The per-heap data for the heap. - */ -void *dma_heap_get_drvdata(struct dma_heap *heap); - -/** - * dma_heap_add - adds a heap to dmabuf heaps - * @exp_info: information needed to register this heap - */ -struct dma_heap *dma_heap_add(const struct dma_heap_export_info *exp_info); - -#endif /* _DMA_HEAPS_H */ -- cgit v1.2.3 From 711b2bfba748b5adf8ad837b490b393197279203 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Thu, 24 Oct 2019 00:38:49 +0200 Subject: phy: add PHY_MODE_LVDS There are combo phys out there that can be switched between doing dsi and lvds. So add a mode definition for it. Signed-off-by: Heiko Stuebner Signed-off-by: Kishon Vijay Abraham I --- include/linux/phy/phy.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h index 15032f145063..56d3a100006a 100644 --- a/include/linux/phy/phy.h +++ b/include/linux/phy/phy.h @@ -38,7 +38,8 @@ enum phy_mode { PHY_MODE_PCIE, PHY_MODE_ETHERNET, PHY_MODE_MIPI_DPHY, - PHY_MODE_SATA + PHY_MODE_SATA, + PHY_MODE_LVDS, }; /** -- cgit v1.2.3 From 22a6564f716b0746b5a05add3f9f37549f89244e Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Thu, 12 Sep 2019 16:26:05 +0300 Subject: clk: ti: clkctrl: convert to use bit helper macros instead of bitops This improves the readibility of the code slightly, and makes modifying the flags bit simpler. Signed-off-by: Tero Kristo --- include/linux/clk/ti.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index 1e8ef96555ce..bb2c5af9082a 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -153,7 +153,7 @@ struct clk_hw_omap { u8 fixed_div; struct clk_omap_reg enable_reg; u8 enable_bit; - u8 flags; + unsigned long flags; struct clk_omap_reg clksel_reg; struct dpll_data *dpll_data; const char *clkdm_name; -- cgit v1.2.3 From 2209b72d41993c13de220b82c830b482925322b9 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Thu, 12 Sep 2019 16:26:06 +0300 Subject: clk: ti: clkctrl: add new exported API for checking standby info Standby status is provided for certain clkctrl clocks to see if the given module has entered standby or not. This is mostly needed by remoteproc code to see if the remoteproc has entered standby and the clock can be turned off safely. Signed-off-by: Tero Kristo --- include/linux/clk/ti.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index bb2c5af9082a..c62f6fa6763d 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -298,6 +298,7 @@ struct ti_clk_features { void ti_clk_setup_features(struct ti_clk_features *features); const struct ti_clk_features *ti_clk_get_features(void); +bool ti_clk_is_in_standby(struct clk *clk); int omap3_noncore_dpll_save_context(struct clk_hw *hw); void omap3_noncore_dpll_restore_context(struct clk_hw *hw); -- cgit v1.2.3 From f1b9509c2fb0ef4db8d22dac9aef8e856a5d81f6 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 30 Oct 2019 15:32:11 -0700 Subject: bpf: Replace prog_raw_tp+btf_id with prog_tracing The bpf program type raw_tp together with 'expected_attach_type' was the most appropriate api to indicate BTF-enabled raw_tp programs. But during development it became apparent that 'expected_attach_type' cannot be used and new 'attach_btf_id' field had to be introduced. Which means that the information is duplicated in two fields where one of them is ignored. Clean it up by introducing new program type where both 'expected_attach_type' and 'attach_btf_id' fields have specific meaning. In the future 'expected_attach_type' will be extended with other attach points that have similar semantics to raw_tp. This patch is replacing BTF-enabled BPF_PROG_TYPE_RAW_TRACEPOINT with prog_type = BPF_RPOG_TYPE_TRACING expected_attach_type = BPF_TRACE_RAW_TP attach_btf_id = btf_id of raw tracepoint inside the kernel Future patches will add expected_attach_type = BPF_TRACE_FENTRY or BPF_TRACE_FEXIT where programs have the same input context and the same helpers, but different attach points. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20191030223212.953010-2-ast@kernel.org --- include/linux/bpf.h | 5 +++++ include/linux/bpf_types.h | 1 + 2 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 171be30fe0ae..80158cff44bd 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -373,6 +373,11 @@ enum bpf_cgroup_storage_type { #define MAX_BPF_CGROUP_STORAGE_TYPE __BPF_CGROUP_STORAGE_MAX +/* The longest tracepoint has 12 args. + * See include/trace/bpf_probe.h + */ +#define MAX_BPF_FUNC_ARGS 12 + struct bpf_prog_stats { u64 cnt; u64 nsecs; diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 36a9c2325176..de14872b01ba 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -26,6 +26,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint) BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event) BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint) BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, raw_tracepoint_writable) +BPF_PROG_TYPE(BPF_PROG_TYPE_TRACING, tracing) #endif #ifdef CONFIG_CGROUP_BPF BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev) -- cgit v1.2.3 From f94df9890e98f2090c6a8d70c795134863b70201 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 24 Sep 2019 16:07:45 +0100 Subject: Add wake_up_interruptible_sync_poll_locked() Add a wakeup call for a case whereby the caller already has the waitqueue spinlock held. This can be used by pipes to alter the ring buffer indices and issue a wakeup under the same spinlock. Signed-off-by: David Howells Acked-by: Peter Zijlstra (Intel) --- include/linux/wait.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/wait.h b/include/linux/wait.h index bb7676d396cd..3283c8d02137 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -202,6 +202,7 @@ void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, vo void __wake_up_locked_key_bookmark(struct wait_queue_head *wq_head, unsigned int mode, void *key, wait_queue_entry_t *bookmark); void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, void *key); +void __wake_up_locked_sync_key(struct wait_queue_head *wq_head, unsigned int mode, void *key); void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr); void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode); @@ -229,6 +230,8 @@ void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode); __wake_up(x, TASK_INTERRUPTIBLE, 1, poll_to_key(m)) #define wake_up_interruptible_sync_poll(x, m) \ __wake_up_sync_key((x), TASK_INTERRUPTIBLE, poll_to_key(m)) +#define wake_up_interruptible_sync_poll_locked(x, m) \ + __wake_up_locked_sync_key((x), TASK_INTERRUPTIBLE, poll_to_key(m)) #define ___wait_cond_timeout(condition) \ ({ \ -- cgit v1.2.3 From 8cefc107ca54c8b06438b7dc9cc08bc0a11d5b98 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 15 Nov 2019 13:30:32 +0000 Subject: pipe: Use head and tail pointers for the ring, not cursor and length Convert pipes to use head and tail pointers for the buffer ring rather than pointer and length as the latter requires two atomic ops to update (or a combined op) whereas the former only requires one. (1) The head pointer is the point at which production occurs and points to the slot in which the next buffer will be placed. This is equivalent to pipe->curbuf + pipe->nrbufs. The head pointer belongs to the write-side. (2) The tail pointer is the point at which consumption occurs. It points to the next slot to be consumed. This is equivalent to pipe->curbuf. The tail pointer belongs to the read-side. (3) head and tail are allowed to run to UINT_MAX and wrap naturally. They are only masked off when the array is being accessed, e.g.: pipe->bufs[head & mask] This means that it is not necessary to have a dead slot in the ring as head == tail isn't ambiguous. (4) The ring is empty if "head == tail". A helper, pipe_empty(), is provided for this. (5) The occupancy of the ring is "head - tail". A helper, pipe_occupancy(), is provided for this. (6) The number of free slots in the ring is "pipe->ring_size - occupancy". A helper, pipe_space_for_user() is provided to indicate how many slots userspace may use. (7) The ring is full if "head - tail >= pipe->ring_size". A helper, pipe_full(), is provided for this. Signed-off-by: David Howells --- include/linux/pipe_fs_i.h | 60 +++++++++++++++++++++++++++++++++++++++++++---- include/linux/uio.h | 4 ++-- 2 files changed, 58 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 5c626fdc10db..96158ca80456 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -30,9 +30,9 @@ struct pipe_buffer { * struct pipe_inode_info - a linux kernel pipe * @mutex: mutex protecting the whole thing * @wait: reader/writer wait point in case of empty/full pipe - * @nrbufs: the number of non-empty pipe buffers in this pipe - * @buffers: total number of buffers (should be a power of 2) - * @curbuf: the current pipe buffer entry + * @head: The point of buffer production + * @tail: The point of buffer consumption + * @ring_size: total number of buffers (should be a power of 2) * @tmp_page: cached released page * @readers: number of current readers of this pipe * @writers: number of current writers of this pipe @@ -48,7 +48,9 @@ struct pipe_buffer { struct pipe_inode_info { struct mutex mutex; wait_queue_head_t wait; - unsigned int nrbufs, curbuf, buffers; + unsigned int head; + unsigned int tail; + unsigned int ring_size; unsigned int readers; unsigned int writers; unsigned int files; @@ -104,6 +106,56 @@ struct pipe_buf_operations { bool (*get)(struct pipe_inode_info *, struct pipe_buffer *); }; +/** + * pipe_empty - Return true if the pipe is empty + * @head: The pipe ring head pointer + * @tail: The pipe ring tail pointer + */ +static inline bool pipe_empty(unsigned int head, unsigned int tail) +{ + return head == tail; +} + +/** + * pipe_occupancy - Return number of slots used in the pipe + * @head: The pipe ring head pointer + * @tail: The pipe ring tail pointer + */ +static inline unsigned int pipe_occupancy(unsigned int head, unsigned int tail) +{ + return head - tail; +} + +/** + * pipe_full - Return true if the pipe is full + * @head: The pipe ring head pointer + * @tail: The pipe ring tail pointer + * @limit: The maximum amount of slots available. + */ +static inline bool pipe_full(unsigned int head, unsigned int tail, + unsigned int limit) +{ + return pipe_occupancy(head, tail) >= limit; +} + +/** + * pipe_space_for_user - Return number of slots available to userspace + * @head: The pipe ring head pointer + * @tail: The pipe ring tail pointer + * @pipe: The pipe info structure + */ +static inline unsigned int pipe_space_for_user(unsigned int head, unsigned int tail, + struct pipe_inode_info *pipe) +{ + unsigned int p_occupancy, p_space; + + p_occupancy = pipe_occupancy(head, tail); + if (p_occupancy >= pipe->ring_size) + return 0; + p_space = pipe->ring_size - p_occupancy; + return p_space; +} + /** * pipe_buf_get - get a reference to a pipe_buffer * @pipe: the pipe that the buffer belongs to diff --git a/include/linux/uio.h b/include/linux/uio.h index ab5f523bc0df..9576fd8158d7 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -45,8 +45,8 @@ struct iov_iter { union { unsigned long nr_segs; struct { - int idx; - int start_idx; + unsigned int head; + unsigned int start_head; }; }; }; -- cgit v1.2.3 From 246880958ac93989c97c73ae1e60b78b4c4c88c5 Mon Sep 17 00:00:00 2001 From: Vikas Gupta Date: Thu, 31 Oct 2019 15:38:50 +0530 Subject: firmware: broadcom: add OP-TEE based BNXT f/w manager This driver registers on TEE bus to interact with OP-TEE based BNXT firmware management modules Cc: Jakub Kicinski Reported-by: kbuild test robot Signed-off-by: Vikas Gupta Signed-off-by: Sheetal Tigadoli Signed-off-by: David S. Miller --- include/linux/firmware/broadcom/tee_bnxt_fw.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 include/linux/firmware/broadcom/tee_bnxt_fw.h (limited to 'include/linux') diff --git a/include/linux/firmware/broadcom/tee_bnxt_fw.h b/include/linux/firmware/broadcom/tee_bnxt_fw.h new file mode 100644 index 000000000000..f24c82d6ef73 --- /dev/null +++ b/include/linux/firmware/broadcom/tee_bnxt_fw.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: BSD-2-Clause */ +/* + * Copyright 2019 Broadcom. + */ + +#ifndef _BROADCOM_TEE_BNXT_FW_H +#define _BROADCOM_TEE_BNXT_FW_H + +#include + +int tee_bnxt_fw_load(void); +int tee_bnxt_copy_coredump(void *buf, u32 offset, u32 size); + +#endif /* _BROADCOM_TEE_BNXT_FW_H */ -- cgit v1.2.3 From df4bb5d128e2c44848aeb36b7ceceba3ac85080d Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Thu, 31 Oct 2019 10:39:20 +0000 Subject: quota: Check that quota is not dirty before release There is a race window where quota was redirted once we drop dq_list_lock inside dqput(), but before we grab dquot->dq_lock inside dquot_release() TASK1 TASK2 (chowner) ->dqput() we_slept: spin_lock(&dq_list_lock) if (dquot_dirty(dquot)) { spin_unlock(&dq_list_lock); dquot->dq_sb->dq_op->write_dquot(dquot); goto we_slept if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) { spin_unlock(&dq_list_lock); dquot->dq_sb->dq_op->release_dquot(dquot); dqget() mark_dquot_dirty() dqput() goto we_slept; } So dquot dirty quota will be released by TASK1, but on next we_sleept loop we detect this and call ->write_dquot() for it. XFSTEST: https://github.com/dmonakhov/xfstests/commit/440a80d4cbb39e9234df4d7240aee1d551c36107 Link: https://lore.kernel.org/r/20191031103920.3919-2-dmonakhov@openvz.org CC: stable@vger.kernel.org Signed-off-by: Dmitry Monakhov Signed-off-by: Jan Kara --- include/linux/quotaops.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 185d94829701..91e0b7624053 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -54,6 +54,16 @@ static inline struct dquot *dqgrab(struct dquot *dquot) atomic_inc(&dquot->dq_count); return dquot; } + +static inline bool dquot_is_busy(struct dquot *dquot) +{ + if (test_bit(DQ_MOD_B, &dquot->dq_flags)) + return true; + if (atomic_read(&dquot->dq_count) > 1) + return true; + return false; +} + void dqput(struct dquot *dquot); int dquot_scan_active(struct super_block *sb, int (*fn)(struct dquot *dquot, unsigned long priv), -- cgit v1.2.3 From 1ac210d128ef6e92698dd3aa4e2e03e831bc9906 Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Thu, 31 Oct 2019 01:18:29 +0200 Subject: bus: fsl-mc: add the fsl_mc_get_endpoint function Using the newly added fsl_mc_get_endpoint function a fsl-mc driver can find its associated endpoint (another object at the other link of a MC firmware link). The API will be used in the following patch in order to discover the connected DPMAC object of a DPNI. Also, the fsl_mc_device_lookup function is made available to the entire fsl-mc bus driver and not just for the dprc driver. Signed-off-by: Ioana Ciornei Signed-off-by: David S. Miller --- include/linux/fsl/mc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h index 975553a9f75d..54d9436600c7 100644 --- a/include/linux/fsl/mc.h +++ b/include/linux/fsl/mc.h @@ -403,6 +403,8 @@ int __must_check fsl_mc_allocate_irqs(struct fsl_mc_device *mc_dev); void fsl_mc_free_irqs(struct fsl_mc_device *mc_dev); +struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev); + extern struct bus_type fsl_mc_bus_type; extern struct device_type fsl_mc_bus_dprc_type; -- cgit v1.2.3 From cec0cb8a28f9060367099beeafd0dbdb76fdfae2 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 25 Oct 2019 12:41:10 -0700 Subject: crypto: skcipher - remove crypto_has_ablkcipher() crypto_has_ablkcipher() has no users, and it does the same thing as crypto_has_skcipher() anyway. So remove it. This also removes the last user of crypto_skcipher_type() and crypto_skcipher_mask(), so remove those too. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- include/linux/crypto.h | 31 ------------------------------- 1 file changed, 31 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 19ea3a371d7b..b7855743f7e3 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -900,20 +900,6 @@ static inline struct crypto_ablkcipher *__crypto_ablkcipher_cast( return (struct crypto_ablkcipher *)tfm; } -static inline u32 crypto_skcipher_type(u32 type) -{ - type &= ~CRYPTO_ALG_TYPE_MASK; - type |= CRYPTO_ALG_TYPE_BLKCIPHER; - return type; -} - -static inline u32 crypto_skcipher_mask(u32 mask) -{ - mask &= ~CRYPTO_ALG_TYPE_MASK; - mask |= CRYPTO_ALG_TYPE_BLKCIPHER_MASK; - return mask; -} - /** * DOC: Asynchronous Block Cipher API * @@ -959,23 +945,6 @@ static inline void crypto_free_ablkcipher(struct crypto_ablkcipher *tfm) crypto_free_tfm(crypto_ablkcipher_tfm(tfm)); } -/** - * crypto_has_ablkcipher() - Search for the availability of an ablkcipher. - * @alg_name: is the cra_name / name or cra_driver_name / driver name of the - * ablkcipher - * @type: specifies the type of the cipher - * @mask: specifies the mask for the cipher - * - * Return: true when the ablkcipher is known to the kernel crypto API; false - * otherwise - */ -static inline int crypto_has_ablkcipher(const char *alg_name, u32 type, - u32 mask) -{ - return crypto_has_alg(alg_name, crypto_skcipher_type(type), - crypto_skcipher_mask(mask)); -} - static inline struct ablkcipher_tfm *crypto_ablkcipher_crt( struct crypto_ablkcipher *tfm) { -- cgit v1.2.3 From c65058b7587fd3d001c57a50285477be521f5350 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 25 Oct 2019 12:41:12 -0700 Subject: crypto: skcipher - remove the "blkcipher" algorithm type Now that all "blkcipher" algorithms have been converted to "skcipher", remove the blkcipher algorithm type. The skcipher (symmetric key cipher) algorithm type was introduced a few years ago to replace both blkcipher and ablkcipher (synchronous and asynchronous block cipher). The advantages of skcipher include: - A much less confusing name, since none of these algorithm types have ever actually been for raw block ciphers, but rather for all length-preserving encryption modes including block cipher modes of operation, stream ciphers, and other length-preserving modes. - It unified blkcipher and ablkcipher into a single algorithm type which supports both synchronous and asynchronous implementations. Note, blkcipher already operated only on scatterlists, so the fact that skcipher does too isn't a regression in functionality. - Better type safety by using struct skcipher_alg, struct crypto_skcipher, etc. instead of crypto_alg, crypto_tfm, etc. - It sometimes simplifies the implementations of algorithms. Also, the blkcipher API was no longer being tested. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- include/linux/crypto.h | 395 +------------------------------------------------ 1 file changed, 2 insertions(+), 393 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index b7855743f7e3..e9f2c6b5d800 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -41,7 +41,6 @@ #define CRYPTO_ALG_TYPE_CIPHER 0x00000001 #define CRYPTO_ALG_TYPE_COMPRESS 0x00000002 #define CRYPTO_ALG_TYPE_AEAD 0x00000003 -#define CRYPTO_ALG_TYPE_BLKCIPHER 0x00000004 #define CRYPTO_ALG_TYPE_ABLKCIPHER 0x00000005 #define CRYPTO_ALG_TYPE_SKCIPHER 0x00000005 #define CRYPTO_ALG_TYPE_KPP 0x00000008 @@ -55,7 +54,6 @@ #define CRYPTO_ALG_TYPE_HASH_MASK 0x0000000e #define CRYPTO_ALG_TYPE_AHASH_MASK 0x0000000e -#define CRYPTO_ALG_TYPE_BLKCIPHER_MASK 0x0000000c #define CRYPTO_ALG_TYPE_ACOMPRESS_MASK 0x0000000e #define CRYPTO_ALG_LARVAL 0x00000010 @@ -141,7 +139,6 @@ struct scatterlist; struct crypto_ablkcipher; struct crypto_async_request; -struct crypto_blkcipher; struct crypto_tfm; struct crypto_type; @@ -176,12 +173,6 @@ struct ablkcipher_request { void *__ctx[] CRYPTO_MINALIGN_ATTR; }; -struct blkcipher_desc { - struct crypto_blkcipher *tfm; - void *info; - u32 flags; -}; - /** * DOC: Block Cipher Algorithm Definitions * @@ -240,32 +231,6 @@ struct ablkcipher_alg { unsigned int ivsize; }; -/** - * struct blkcipher_alg - synchronous block cipher definition - * @min_keysize: see struct ablkcipher_alg - * @max_keysize: see struct ablkcipher_alg - * @setkey: see struct ablkcipher_alg - * @encrypt: see struct ablkcipher_alg - * @decrypt: see struct ablkcipher_alg - * @ivsize: see struct ablkcipher_alg - * - * All fields except @ivsize are mandatory and must be filled. - */ -struct blkcipher_alg { - int (*setkey)(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen); - int (*encrypt)(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes); - int (*decrypt)(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes); - - unsigned int min_keysize; - unsigned int max_keysize; - unsigned int ivsize; -}; - /** * struct cipher_alg - single-block symmetric ciphers definition * @cia_min_keysize: Minimum key size supported by the transformation. This is @@ -451,7 +416,6 @@ struct crypto_istat_rng { #endif /* CONFIG_CRYPTO_STATS */ #define cra_ablkcipher cra_u.ablkcipher -#define cra_blkcipher cra_u.blkcipher #define cra_cipher cra_u.cipher #define cra_compress cra_u.compress @@ -499,9 +463,8 @@ struct crypto_istat_rng { * transformation algorithm. * @cra_type: Type of the cryptographic transformation. This is a pointer to * struct crypto_type, which implements callbacks common for all - * transformation types. There are multiple options: - * &crypto_blkcipher_type, &crypto_ablkcipher_type, - * &crypto_ahash_type, &crypto_rng_type. + * transformation types. There are multiple options, such as + * &crypto_skcipher_type, &crypto_ahash_type, &crypto_rng_type. * This field might be empty. In that case, there are no common * callbacks. This is the case for: cipher, compress, shash. * @cra_u: Callbacks implementing the transformation. This is a union of @@ -522,8 +485,6 @@ struct crypto_istat_rng { * @cra_init. * @cra_u.ablkcipher: Union member which contains an asynchronous block cipher * definition. See @struct @ablkcipher_alg. - * @cra_u.blkcipher: Union member which contains a synchronous block cipher - * definition See @struct @blkcipher_alg. * @cra_u.cipher: Union member which contains a single-block symmetric cipher * definition. See @struct @cipher_alg. * @cra_u.compress: Union member which contains a (de)compression algorithm. @@ -566,7 +527,6 @@ struct crypto_alg { union { struct ablkcipher_alg ablkcipher; - struct blkcipher_alg blkcipher; struct cipher_alg cipher; struct compress_alg compress; } cra_u; @@ -727,16 +687,6 @@ struct ablkcipher_tfm { unsigned int reqsize; }; -struct blkcipher_tfm { - void *iv; - int (*setkey)(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen); - int (*encrypt)(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes); - int (*decrypt)(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes); -}; - struct cipher_tfm { int (*cit_setkey)(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen); @@ -754,7 +704,6 @@ struct compress_tfm { }; #define crt_ablkcipher crt_u.ablkcipher -#define crt_blkcipher crt_u.blkcipher #define crt_cipher crt_u.cipher #define crt_compress crt_u.compress @@ -764,7 +713,6 @@ struct crypto_tfm { union { struct ablkcipher_tfm ablkcipher; - struct blkcipher_tfm blkcipher; struct cipher_tfm cipher; struct compress_tfm compress; } crt_u; @@ -780,10 +728,6 @@ struct crypto_ablkcipher { struct crypto_tfm base; }; -struct crypto_blkcipher { - struct crypto_tfm base; -}; - struct crypto_cipher { struct crypto_tfm base; }; @@ -1232,341 +1176,6 @@ static inline void ablkcipher_request_set_crypt( req->info = iv; } -/** - * DOC: Synchronous Block Cipher API - * - * The synchronous block cipher API is used with the ciphers of type - * CRYPTO_ALG_TYPE_BLKCIPHER (listed as type "blkcipher" in /proc/crypto) - * - * Synchronous calls, have a context in the tfm. But since a single tfm can be - * used in multiple calls and in parallel, this info should not be changeable - * (unless a lock is used). This applies, for example, to the symmetric key. - * However, the IV is changeable, so there is an iv field in blkcipher_tfm - * structure for synchronous blkcipher api. So, its the only state info that can - * be kept for synchronous calls without using a big lock across a tfm. - * - * The block cipher API allows the use of a complete cipher, i.e. a cipher - * consisting of a template (a block chaining mode) and a single block cipher - * primitive (e.g. AES). - * - * The plaintext data buffer and the ciphertext data buffer are pointed to - * by using scatter/gather lists. The cipher operation is performed - * on all segments of the provided scatter/gather lists. - * - * The kernel crypto API supports a cipher operation "in-place" which means that - * the caller may provide the same scatter/gather list for the plaintext and - * cipher text. After the completion of the cipher operation, the plaintext - * data is replaced with the ciphertext data in case of an encryption and vice - * versa for a decryption. The caller must ensure that the scatter/gather lists - * for the output data point to sufficiently large buffers, i.e. multiples of - * the block size of the cipher. - */ - -static inline struct crypto_blkcipher *__crypto_blkcipher_cast( - struct crypto_tfm *tfm) -{ - return (struct crypto_blkcipher *)tfm; -} - -static inline struct crypto_blkcipher *crypto_blkcipher_cast( - struct crypto_tfm *tfm) -{ - BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_BLKCIPHER); - return __crypto_blkcipher_cast(tfm); -} - -/** - * crypto_alloc_blkcipher() - allocate synchronous block cipher handle - * @alg_name: is the cra_name / name or cra_driver_name / driver name of the - * blkcipher cipher - * @type: specifies the type of the cipher - * @mask: specifies the mask for the cipher - * - * Allocate a cipher handle for a block cipher. The returned struct - * crypto_blkcipher is the cipher handle that is required for any subsequent - * API invocation for that block cipher. - * - * Return: allocated cipher handle in case of success; IS_ERR() is true in case - * of an error, PTR_ERR() returns the error code. - */ -static inline struct crypto_blkcipher *crypto_alloc_blkcipher( - const char *alg_name, u32 type, u32 mask) -{ - type &= ~CRYPTO_ALG_TYPE_MASK; - type |= CRYPTO_ALG_TYPE_BLKCIPHER; - mask |= CRYPTO_ALG_TYPE_MASK; - - return __crypto_blkcipher_cast(crypto_alloc_base(alg_name, type, mask)); -} - -static inline struct crypto_tfm *crypto_blkcipher_tfm( - struct crypto_blkcipher *tfm) -{ - return &tfm->base; -} - -/** - * crypto_free_blkcipher() - zeroize and free the block cipher handle - * @tfm: cipher handle to be freed - */ -static inline void crypto_free_blkcipher(struct crypto_blkcipher *tfm) -{ - crypto_free_tfm(crypto_blkcipher_tfm(tfm)); -} - -/** - * crypto_has_blkcipher() - Search for the availability of a block cipher - * @alg_name: is the cra_name / name or cra_driver_name / driver name of the - * block cipher - * @type: specifies the type of the cipher - * @mask: specifies the mask for the cipher - * - * Return: true when the block cipher is known to the kernel crypto API; false - * otherwise - */ -static inline int crypto_has_blkcipher(const char *alg_name, u32 type, u32 mask) -{ - type &= ~CRYPTO_ALG_TYPE_MASK; - type |= CRYPTO_ALG_TYPE_BLKCIPHER; - mask |= CRYPTO_ALG_TYPE_MASK; - - return crypto_has_alg(alg_name, type, mask); -} - -/** - * crypto_blkcipher_name() - return the name / cra_name from the cipher handle - * @tfm: cipher handle - * - * Return: The character string holding the name of the cipher - */ -static inline const char *crypto_blkcipher_name(struct crypto_blkcipher *tfm) -{ - return crypto_tfm_alg_name(crypto_blkcipher_tfm(tfm)); -} - -static inline struct blkcipher_tfm *crypto_blkcipher_crt( - struct crypto_blkcipher *tfm) -{ - return &crypto_blkcipher_tfm(tfm)->crt_blkcipher; -} - -static inline struct blkcipher_alg *crypto_blkcipher_alg( - struct crypto_blkcipher *tfm) -{ - return &crypto_blkcipher_tfm(tfm)->__crt_alg->cra_blkcipher; -} - -/** - * crypto_blkcipher_ivsize() - obtain IV size - * @tfm: cipher handle - * - * The size of the IV for the block cipher referenced by the cipher handle is - * returned. This IV size may be zero if the cipher does not need an IV. - * - * Return: IV size in bytes - */ -static inline unsigned int crypto_blkcipher_ivsize(struct crypto_blkcipher *tfm) -{ - return crypto_blkcipher_alg(tfm)->ivsize; -} - -/** - * crypto_blkcipher_blocksize() - obtain block size of cipher - * @tfm: cipher handle - * - * The block size for the block cipher referenced with the cipher handle is - * returned. The caller may use that information to allocate appropriate - * memory for the data returned by the encryption or decryption operation. - * - * Return: block size of cipher - */ -static inline unsigned int crypto_blkcipher_blocksize( - struct crypto_blkcipher *tfm) -{ - return crypto_tfm_alg_blocksize(crypto_blkcipher_tfm(tfm)); -} - -static inline unsigned int crypto_blkcipher_alignmask( - struct crypto_blkcipher *tfm) -{ - return crypto_tfm_alg_alignmask(crypto_blkcipher_tfm(tfm)); -} - -static inline u32 crypto_blkcipher_get_flags(struct crypto_blkcipher *tfm) -{ - return crypto_tfm_get_flags(crypto_blkcipher_tfm(tfm)); -} - -static inline void crypto_blkcipher_set_flags(struct crypto_blkcipher *tfm, - u32 flags) -{ - crypto_tfm_set_flags(crypto_blkcipher_tfm(tfm), flags); -} - -static inline void crypto_blkcipher_clear_flags(struct crypto_blkcipher *tfm, - u32 flags) -{ - crypto_tfm_clear_flags(crypto_blkcipher_tfm(tfm), flags); -} - -/** - * crypto_blkcipher_setkey() - set key for cipher - * @tfm: cipher handle - * @key: buffer holding the key - * @keylen: length of the key in bytes - * - * The caller provided key is set for the block cipher referenced by the cipher - * handle. - * - * Note, the key length determines the cipher type. Many block ciphers implement - * different cipher modes depending on the key size, such as AES-128 vs AES-192 - * vs. AES-256. When providing a 16 byte key for an AES cipher handle, AES-128 - * is performed. - * - * Return: 0 if the setting of the key was successful; < 0 if an error occurred - */ -static inline int crypto_blkcipher_setkey(struct crypto_blkcipher *tfm, - const u8 *key, unsigned int keylen) -{ - return crypto_blkcipher_crt(tfm)->setkey(crypto_blkcipher_tfm(tfm), - key, keylen); -} - -/** - * crypto_blkcipher_encrypt() - encrypt plaintext - * @desc: reference to the block cipher handle with meta data - * @dst: scatter/gather list that is filled by the cipher operation with the - * ciphertext - * @src: scatter/gather list that holds the plaintext - * @nbytes: number of bytes of the plaintext to encrypt. - * - * Encrypt plaintext data using the IV set by the caller with a preceding - * call of crypto_blkcipher_set_iv. - * - * The blkcipher_desc data structure must be filled by the caller and can - * reside on the stack. The caller must fill desc as follows: desc.tfm is filled - * with the block cipher handle; desc.flags is filled with either - * CRYPTO_TFM_REQ_MAY_SLEEP or 0. - * - * Return: 0 if the cipher operation was successful; < 0 if an error occurred - */ -static inline int crypto_blkcipher_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, - unsigned int nbytes) -{ - desc->info = crypto_blkcipher_crt(desc->tfm)->iv; - return crypto_blkcipher_crt(desc->tfm)->encrypt(desc, dst, src, nbytes); -} - -/** - * crypto_blkcipher_encrypt_iv() - encrypt plaintext with dedicated IV - * @desc: reference to the block cipher handle with meta data - * @dst: scatter/gather list that is filled by the cipher operation with the - * ciphertext - * @src: scatter/gather list that holds the plaintext - * @nbytes: number of bytes of the plaintext to encrypt. - * - * Encrypt plaintext data with the use of an IV that is solely used for this - * cipher operation. Any previously set IV is not used. - * - * The blkcipher_desc data structure must be filled by the caller and can - * reside on the stack. The caller must fill desc as follows: desc.tfm is filled - * with the block cipher handle; desc.info is filled with the IV to be used for - * the current operation; desc.flags is filled with either - * CRYPTO_TFM_REQ_MAY_SLEEP or 0. - * - * Return: 0 if the cipher operation was successful; < 0 if an error occurred - */ -static inline int crypto_blkcipher_encrypt_iv(struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, - unsigned int nbytes) -{ - return crypto_blkcipher_crt(desc->tfm)->encrypt(desc, dst, src, nbytes); -} - -/** - * crypto_blkcipher_decrypt() - decrypt ciphertext - * @desc: reference to the block cipher handle with meta data - * @dst: scatter/gather list that is filled by the cipher operation with the - * plaintext - * @src: scatter/gather list that holds the ciphertext - * @nbytes: number of bytes of the ciphertext to decrypt. - * - * Decrypt ciphertext data using the IV set by the caller with a preceding - * call of crypto_blkcipher_set_iv. - * - * The blkcipher_desc data structure must be filled by the caller as documented - * for the crypto_blkcipher_encrypt call above. - * - * Return: 0 if the cipher operation was successful; < 0 if an error occurred - * - */ -static inline int crypto_blkcipher_decrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, - unsigned int nbytes) -{ - desc->info = crypto_blkcipher_crt(desc->tfm)->iv; - return crypto_blkcipher_crt(desc->tfm)->decrypt(desc, dst, src, nbytes); -} - -/** - * crypto_blkcipher_decrypt_iv() - decrypt ciphertext with dedicated IV - * @desc: reference to the block cipher handle with meta data - * @dst: scatter/gather list that is filled by the cipher operation with the - * plaintext - * @src: scatter/gather list that holds the ciphertext - * @nbytes: number of bytes of the ciphertext to decrypt. - * - * Decrypt ciphertext data with the use of an IV that is solely used for this - * cipher operation. Any previously set IV is not used. - * - * The blkcipher_desc data structure must be filled by the caller as documented - * for the crypto_blkcipher_encrypt_iv call above. - * - * Return: 0 if the cipher operation was successful; < 0 if an error occurred - */ -static inline int crypto_blkcipher_decrypt_iv(struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, - unsigned int nbytes) -{ - return crypto_blkcipher_crt(desc->tfm)->decrypt(desc, dst, src, nbytes); -} - -/** - * crypto_blkcipher_set_iv() - set IV for cipher - * @tfm: cipher handle - * @src: buffer holding the IV - * @len: length of the IV in bytes - * - * The caller provided IV is set for the block cipher referenced by the cipher - * handle. - */ -static inline void crypto_blkcipher_set_iv(struct crypto_blkcipher *tfm, - const u8 *src, unsigned int len) -{ - memcpy(crypto_blkcipher_crt(tfm)->iv, src, len); -} - -/** - * crypto_blkcipher_get_iv() - obtain IV from cipher - * @tfm: cipher handle - * @dst: buffer filled with the IV - * @len: length of the buffer dst - * - * The caller can obtain the IV set for the block cipher referenced by the - * cipher handle and store it into the user-provided buffer. If the buffer - * has an insufficient space, the IV is truncated to fit the buffer. - */ -static inline void crypto_blkcipher_get_iv(struct crypto_blkcipher *tfm, - u8 *dst, unsigned int len) -{ - memcpy(dst, crypto_blkcipher_crt(tfm)->iv, len); -} - /** * DOC: Single Block Cipher API * -- cgit v1.2.3 From 8b5369ea580964dbc982781bfb9fb93459fc5e8d Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Mon, 14 Oct 2019 20:31:03 +0200 Subject: dma/direct: turn ARCH_ZONE_DMA_BITS into a variable Some architectures, notably ARM, are interested in tweaking this depending on their runtime DMA addressing limitations. Acked-by: Christoph Hellwig Signed-off-by: Nicolas Saenz Julienne Signed-off-by: Catalin Marinas --- include/linux/dma-direct.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index adf993a3bd58..d03af3605460 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -5,6 +5,8 @@ #include #include +extern unsigned int zone_dma_bits; + #ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA #include #else -- cgit v1.2.3 From 73727f4dafa2df107e85753c5ab703a1f344e1f1 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 30 Oct 2019 16:43:10 +0100 Subject: livepatch: Basic API to track system state changes This is another step how to help maintaining more livepatches. One big help was the atomic replace and cumulative livepatches. These livepatches replace the already installed ones. Therefore it should be enough when each cumulative livepatch is consistent. The problems might come with shadow variables and callbacks. They might change the system behavior or state so that it is no longer safe to go back and use an older livepatch or the original kernel code. Also, a new livepatch must be able to detect changes which were made by the already installed livepatches. This is where the livepatch system state tracking gets useful. It allows to: - find whether a system state has already been modified by previous livepatches - store data needed to manipulate and restore the system state The information about the manipulated system states is stored in an array of struct klp_state. It can be searched by two new functions klp_get_state() and klp_get_prev_state(). The dependencies are going to be solved by a version field added later. The only important information is that it will be allowed to modify the same state by more non-cumulative livepatches. It is similar to allowing to modify the same function several times. The livepatch author is responsible for preventing incompatible changes. Link: http://lkml.kernel.org/r/20191030154313.13263-3-pmladek@suse.com To: Jiri Kosina Cc: Kamalesh Babulal Cc: Nicolai Stange Cc: live-patching@vger.kernel.org Cc: linux-kernel@vger.kernel.org Acked-by: Miroslav Benes Acked-by: Joe Lawrence Acked-by: Josh Poimboeuf Signed-off-by: Petr Mladek --- include/linux/livepatch.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h index 273400814020..726947338fd5 100644 --- a/include/linux/livepatch.h +++ b/include/linux/livepatch.h @@ -130,10 +130,21 @@ struct klp_object { bool patched; }; +/** + * struct klp_state - state of the system modified by the livepatch + * @id: system state identifier (non-zero) + * @data: custom data + */ +struct klp_state { + unsigned long id; + void *data; +}; + /** * struct klp_patch - patch structure for live patching * @mod: reference to the live patch module * @objs: object entries for kernel objects to be patched + * @states: system states that can get modified * @replace: replace all actively used patches * @list: list node for global list of actively used patches * @kobj: kobject for sysfs resources @@ -147,6 +158,7 @@ struct klp_patch { /* external */ struct module *mod; struct klp_object *objs; + struct klp_state *states; bool replace; /* internal */ @@ -217,6 +229,9 @@ void *klp_shadow_get_or_alloc(void *obj, unsigned long id, void klp_shadow_free(void *obj, unsigned long id, klp_shadow_dtor_t dtor); void klp_shadow_free_all(unsigned long id, klp_shadow_dtor_t dtor); +struct klp_state *klp_get_state(struct klp_patch *patch, unsigned long id); +struct klp_state *klp_get_prev_state(unsigned long id); + #else /* !CONFIG_LIVEPATCH */ static inline int klp_module_coming(struct module *mod) { return 0; } -- cgit v1.2.3 From 92c9abf5e57500ea7dc59a55273aa7850b631bda Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 30 Oct 2019 16:43:11 +0100 Subject: livepatch: Allow to distinguish different version of system state changes The atomic replace runs pre/post (un)install callbacks only from the new livepatch. There are several reasons for this: + Simplicity: clear ordering of operations, no interactions between old and new callbacks. + Reliability: only new livepatch knows what changes can already be made by older livepatches and how to take over the state. + Testing: the atomic replace can be properly tested only when a newer livepatch is available. It might be too late to fix unwanted effect of callbacks from older livepatches. It might happen that an older change is not enough and the same system state has to be modified another way. Different changes need to get distinguished by a version number added to struct klp_state. The version can also be used to prevent loading incompatible livepatches. The check is done when the livepatch is enabled. The rules are: + Any completely new system state modification is allowed. + System state modifications with the same or higher version are allowed for already modified system states. + Cumulative livepatches must handle all system state modifications from already installed livepatches. + Non-cumulative livepatches are allowed to touch already modified system states. Link: http://lkml.kernel.org/r/20191030154313.13263-4-pmladek@suse.com To: Jiri Kosina Cc: Kamalesh Babulal Cc: Nicolai Stange Cc: live-patching@vger.kernel.org Cc: linux-kernel@vger.kernel.org Acked-by: Miroslav Benes Acked-by: Joe Lawrence Acked-by: Josh Poimboeuf Signed-off-by: Petr Mladek --- include/linux/livepatch.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h index 726947338fd5..e894e74905f3 100644 --- a/include/linux/livepatch.h +++ b/include/linux/livepatch.h @@ -133,10 +133,12 @@ struct klp_object { /** * struct klp_state - state of the system modified by the livepatch * @id: system state identifier (non-zero) + * @version: version of the change * @data: custom data */ struct klp_state { unsigned long id; + unsigned int version; void *data; }; -- cgit v1.2.3 From 626fb735a43ddcb7b2c58c27cb03b098acc03339 Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 30 Oct 2019 00:59:30 +0800 Subject: blk-mq: Make blk_mq_run_hw_queue() return void Since commit 97889f9ac24f ("blk-mq: remove synchronize_rcu() from blk_mq_del_queue_tag_set()"), the return value of blk_mq_run_hw_queue() is never checked, so make it return void, which very marginally simplifies the code. Reviewed-by: Bob Liu Signed-off-by: John Garry Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 4919d22e1aff..dc03e059fdff 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -502,7 +502,7 @@ void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async); void blk_mq_quiesce_queue(struct request_queue *q); void blk_mq_unquiesce_queue(struct request_queue *q); void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); -bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); +void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_run_hw_queues(struct request_queue *q, bool async); void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, busy_tag_iter_fn *fn, void *priv); -- cgit v1.2.3 From 25937580a5065d6fbd92d9c8ebd47145ad80052e Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 31 Oct 2019 10:59:44 +0100 Subject: ata: define AC_ERR_OK Since we will return enum ata_completion_errors from qc_prep in the next patch, let's define AC_ERR_OK to mark the OK status. Signed-off-by: Jiri Slaby Cc: Jens Axboe Cc: linux-ide@vger.kernel.org Signed-off-by: Jens Axboe --- include/linux/libata.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 207e7ee764ce..b63ce4ebcd66 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -484,6 +484,7 @@ enum hsm_task_states { }; enum ata_completion_errors { + AC_ERR_OK = 0, /* no error */ AC_ERR_DEV = (1 << 0), /* device reported error */ AC_ERR_HSM = (1 << 1), /* host state machine violation */ AC_ERR_TIMEOUT = (1 << 2), /* timeout */ -- cgit v1.2.3 From 95364f36701e62dd50eee91e1303187fd1a9f567 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 31 Oct 2019 10:59:45 +0100 Subject: ata: make qc_prep return ata_completion_errors In case a driver wants to return an error from qc_prep, return enum ata_completion_errors. sata_mv is one of those drivers -- see the next patch. Other drivers return the newly defined AC_ERR_OK. [v2] use enum ata_completion_errors and AC_ERR_OK. Signed-off-by: Jiri Slaby Cc: Jens Axboe Cc: linux-ide@vger.kernel.org Signed-off-by: Jens Axboe --- include/linux/libata.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index b63ce4ebcd66..d3bbfddf616a 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -892,9 +892,9 @@ struct ata_port_operations { /* * Command execution */ - int (*qc_defer)(struct ata_queued_cmd *qc); - int (*check_atapi_dma)(struct ata_queued_cmd *qc); - void (*qc_prep)(struct ata_queued_cmd *qc); + int (*qc_defer)(struct ata_queued_cmd *qc); + int (*check_atapi_dma)(struct ata_queued_cmd *qc); + enum ata_completion_errors (*qc_prep)(struct ata_queued_cmd *qc); unsigned int (*qc_issue)(struct ata_queued_cmd *qc); bool (*qc_fill_rtf)(struct ata_queued_cmd *qc); @@ -1162,7 +1162,7 @@ extern int ata_xfer_mode2shift(unsigned long xfer_mode); extern const char *ata_mode_string(unsigned long xfer_mask); extern unsigned long ata_id_xfermask(const u16 *id); extern int ata_std_qc_defer(struct ata_queued_cmd *qc); -extern void ata_noop_qc_prep(struct ata_queued_cmd *qc); +extern enum ata_completion_errors ata_noop_qc_prep(struct ata_queued_cmd *qc); extern void ata_sg_init(struct ata_queued_cmd *qc, struct scatterlist *sg, unsigned int n_elem); extern unsigned int ata_dev_classify(const struct ata_taskfile *tf); @@ -1894,9 +1894,9 @@ extern const struct ata_port_operations ata_bmdma_port_ops; .sg_tablesize = LIBATA_MAX_PRD, \ .dma_boundary = ATA_DMA_BOUNDARY -extern void ata_bmdma_qc_prep(struct ata_queued_cmd *qc); +extern enum ata_completion_errors ata_bmdma_qc_prep(struct ata_queued_cmd *qc); extern unsigned int ata_bmdma_qc_issue(struct ata_queued_cmd *qc); -extern void ata_bmdma_dumb_qc_prep(struct ata_queued_cmd *qc); +extern enum ata_completion_errors ata_bmdma_dumb_qc_prep(struct ata_queued_cmd *qc); extern unsigned int ata_bmdma_port_intr(struct ata_port *ap, struct ata_queued_cmd *qc); extern irqreturn_t ata_bmdma_interrupt(int irq, void *dev_instance); -- cgit v1.2.3 From e53a9d26cf80565cfb7172fc52a0dfac73613a0f Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 28 Oct 2019 23:35:30 +0000 Subject: IB/mlx5: Introduce and use mlx5_core_is_vf() Instead of deciding a given device is virtual function or not based on a device is PF or not, use already defined MLX5_COREDEV_VF by introducing an helper API mlx5_core_is_vf(). This enables to clearly identify PF, VF and non virtual functions. Signed-off-by: Parav Pandit Reviewed-by: Vu Pham Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 3e80f03a387f..7b4801e96feb 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1121,6 +1121,11 @@ static inline bool mlx5_core_is_pf(const struct mlx5_core_dev *dev) return dev->coredev_type == MLX5_COREDEV_PF; } +static inline bool mlx5_core_is_vf(const struct mlx5_core_dev *dev) +{ + return dev->coredev_type == MLX5_COREDEV_VF; +} + static inline bool mlx5_core_is_ecpf(struct mlx5_core_dev *dev) { return dev->caps.embedded_cpu; -- cgit v1.2.3 From d817991cc7486ab83f6c7188b0bc80eebee872f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Fri, 1 Nov 2019 12:03:46 +0100 Subject: xsk: Restructure/inline XSKMAP lookup/redirect/flush MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In this commit the XSKMAP entry lookup function used by the XDP redirect code is moved from the xskmap.c file to the xdp_sock.h header, so the lookup can be inlined from, e.g., the bpf_xdp_redirect_map() function. Further the __xsk_map_redirect() and __xsk_map_flush() is moved to the xsk.c, which lets the compiler inline the xsk_rcv() and xsk_flush() functions. Finally, all the XDP socket functions were moved from linux/bpf.h to net/xdp_sock.h, where most of the XDP sockets functions are anyway. This yields a ~2% performance boost for the xdpsock "rx_drop" scenario. Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20191101110346.15004-4-bjorn.topel@gmail.com --- include/linux/bpf.h | 25 ------------------------- 1 file changed, 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 80158cff44bd..7c7f518811a6 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1009,31 +1009,6 @@ static inline int sock_map_get_from_fd(const union bpf_attr *attr, } #endif -#if defined(CONFIG_XDP_SOCKETS) -struct xdp_sock; -struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, u32 key); -int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp, - struct xdp_sock *xs); -void __xsk_map_flush(struct bpf_map *map); -#else -struct xdp_sock; -static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, - u32 key) -{ - return NULL; -} - -static inline int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp, - struct xdp_sock *xs) -{ - return -EOPNOTSUPP; -} - -static inline void __xsk_map_flush(struct bpf_map *map) -{ -} -#endif - #if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) void bpf_sk_reuseport_detach(struct sock *sk); int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key, -- cgit v1.2.3 From 05ef983e0d65a31b370a4e1b93c1efd490ae778f Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Mon, 28 Oct 2019 15:00:22 -0700 Subject: driver core: Add device link support for SYNC_STATE_ONLY flag Parent devices might need to create "proxy" device links from themselves to supplier devices to make sure the supplier devices don't get a sync_state() before the child consumer devices get a chance to add device links to the supplier devices. However, the parent device has no real dependency on the supplier device and probing, suspend/resume or runtime PM don't need to be affected by the supplier device. To capture these cases, create a SYNC_STATE_ONLY device link flag that only affects sync_state() behavior and doesn't affect probing, suspend/resume or runtime PM. Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20191028220027.251605-2-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 6978bb471567..82890e1b8f08 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1096,6 +1096,7 @@ enum device_link_state { * AUTOREMOVE_SUPPLIER: Remove the link automatically on supplier driver unbind. * AUTOPROBE_CONSUMER: Probe consumer driver automatically after supplier binds. * MANAGED: The core tracks presence of supplier/consumer drivers (internal). + * SYNC_STATE_ONLY: Link only affects sync_state() behavior. */ #define DL_FLAG_STATELESS BIT(0) #define DL_FLAG_AUTOREMOVE_CONSUMER BIT(1) @@ -1104,6 +1105,7 @@ enum device_link_state { #define DL_FLAG_AUTOREMOVE_SUPPLIER BIT(4) #define DL_FLAG_AUTOPROBE_CONSUMER BIT(5) #define DL_FLAG_MANAGED BIT(6) +#define DL_FLAG_SYNC_STATE_ONLY BIT(7) /** * struct device_link - Device link representation. -- cgit v1.2.3 From bcbbcfd57247f4c2976055995e5760fb576aae1e Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Mon, 28 Oct 2019 15:00:23 -0700 Subject: driver core: Allow a device to wait on optional suppliers Before this change, if a device is waiting on suppliers, it's assumed that all those suppliers are needed for the device to probe successfully. This change allows marking a devices as waiting only on optional suppliers. This allows a device to wait on suppliers (and link to them as soon as they are available) without preventing the device from being probed. Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20191028220027.251605-3-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 82890e1b8f08..d1bcc8f122f6 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1155,6 +1155,8 @@ enum dl_dev_state { * @consumers: List of links to consumer devices. * @needs_suppliers: Hook to global list of devices waiting for suppliers. * @defer_sync: Hook to global list of devices that have deferred sync_state. + * @need_for_probe: If needs_suppliers is on a list, this indicates if the + * suppliers are needed for probe or not. * @status: Driver status information. */ struct dev_links_info { @@ -1162,6 +1164,7 @@ struct dev_links_info { struct list_head consumers; struct list_head needs_suppliers; struct list_head defer_sync; + bool need_for_probe; enum dl_dev_state status; }; -- cgit v1.2.3 From 03324507e66c7664c754b1ef92c5c3be24c78aa2 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Mon, 28 Oct 2019 15:00:24 -0700 Subject: driver core: Allow fwnode_operations.add_links to differentiate errors When add_links() still has suppliers that it needs to link to in the future, this patch allows it to differentiate between suppliers that are needed for probing vs suppliers that are needed for sync_state() correctness. Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20191028220027.251605-4-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/fwnode.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 97223e2410bd..766ff9bb5876 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -94,10 +94,15 @@ struct fwnode_reference_args { * available suppliers. * * Return 0 if device links have been successfully created to all - * the suppliers of this device or if the supplier information is - * not known. Return an error if and only if the supplier - * information is known but some of the suppliers are not yet - * available to create device links to. + * the suppliers this device needs to create device links to or if + * the supplier information is not known. + * + * Return -ENODEV if and only if the suppliers needed for probing + * the device are not yet available to create device links to. + * + * Return -EAGAIN if there are suppliers that need to be linked to + * that are not yet available but none of those suppliers are + * necessary for probing this device. */ struct fwnode_operations { struct fwnode_handle *(*get)(struct fwnode_handle *fwnode); -- cgit v1.2.3 From c7c1168909410e692be6df17d0092363a00f33a9 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 11 Oct 2019 15:29:28 +0200 Subject: debugfs: remove return value of debugfs_create_x8() No one checks the return value of debugfs_create_x8(), as it's not needed, so make the return value void, so that no one tries to do so in the future. Link: https://lore.kernel.org/r/20191011132931.1186197-5-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/debugfs.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 33690949b45d..c127c159d10a 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -107,8 +107,8 @@ void debugfs_create_u64(const char *name, umode_t mode, struct dentry *parent, u64 *value); struct dentry *debugfs_create_ulong(const char *name, umode_t mode, struct dentry *parent, unsigned long *value); -struct dentry *debugfs_create_x8(const char *name, umode_t mode, - struct dentry *parent, u8 *value); +void debugfs_create_x8(const char *name, umode_t mode, struct dentry *parent, + u8 *value); void debugfs_create_x16(const char *name, umode_t mode, struct dentry *parent, u16 *value); void debugfs_create_x32(const char *name, umode_t mode, struct dentry *parent, @@ -268,12 +268,8 @@ static inline struct dentry *debugfs_create_ulong(const char *name, return ERR_PTR(-ENODEV); } -static inline struct dentry *debugfs_create_x8(const char *name, umode_t mode, - struct dentry *parent, - u8 *value) -{ - return ERR_PTR(-ENODEV); -} +static inline void debugfs_create_x8(const char *name, umode_t mode, + struct dentry *parent, u8 *value) { } static inline void debugfs_create_x16(const char *name, umode_t mode, struct dentry *parent, u16 *value) { } -- cgit v1.2.3 From 1d1585ca0f48fe7ed95c3571f3e4a82b2b5045dc Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 2 Nov 2019 00:17:56 +0100 Subject: uaccess: Add non-pagefault user-space write function Commit 3d7081822f7f ("uaccess: Add non-pagefault user-space read functions") missed to add probe write function, therefore factor out a probe_write_common() helper with most logic of probe_kernel_write() except setting KERNEL_DS, and add a new probe_user_write() helper so it can be used from BPF side. Again, on some archs, the user address space and kernel address space can co-exist and be overlapping, so in such case, setting KERNEL_DS would mean that the given address is treated as being in kernel address space. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Cc: Masami Hiramatsu Link: https://lore.kernel.org/bpf/9df2542e68141bfa3addde631441ee45503856a8.1572649915.git.daniel@iogearbox.net --- include/linux/uaccess.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index d4ee6e942562..38555435a64a 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -337,6 +337,18 @@ extern long __probe_user_read(void *dst, const void __user *src, size_t size); extern long notrace probe_kernel_write(void *dst, const void *src, size_t size); extern long notrace __probe_kernel_write(void *dst, const void *src, size_t size); +/* + * probe_user_write(): safely attempt to write to a location in user space + * @dst: address to write to + * @src: pointer to the data that shall be written + * @size: size of the data chunk + * + * Safely write to address @dst from the buffer at @src. If a kernel fault + * happens, handle that and return -EFAULT. + */ +extern long notrace probe_user_write(void __user *dst, const void *src, size_t size); +extern long notrace __probe_user_write(void __user *dst, const void *src, size_t size); + extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count); extern long strncpy_from_unsafe_user(char *dst, const void __user *unsafe_addr, long count); -- cgit v1.2.3 From 75a1a607bb7e6d918be3aca11ec2214a275392f4 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 2 Nov 2019 00:17:57 +0100 Subject: uaccess: Add strict non-pagefault kernel-space read function Add two new probe_kernel_read_strict() and strncpy_from_unsafe_strict() helpers which by default alias to the __probe_kernel_read() and the __strncpy_from_unsafe(), respectively, but can be overridden by archs which have non-overlapping address ranges for kernel space and user space in order to bail out with -EFAULT when attempting to probe user memory including non-canonical user access addresses [0]: 4-level page tables: user-space mem: 0x0000000000000000 - 0x00007fffffffffff non-canonical: 0x0000800000000000 - 0xffff7fffffffffff 5-level page tables: user-space mem: 0x0000000000000000 - 0x00ffffffffffffff non-canonical: 0x0100000000000000 - 0xfeffffffffffffff The idea is that these helpers are complementary to the probe_user_read() and strncpy_from_unsafe_user() which probe user-only memory. Both added helpers here do the same, but for kernel-only addresses. Both set of helpers are going to be used for BPF tracing. They also explicitly avoid throwing the splat for non-canonical user addresses from 00c42373d397 ("x86-64: add warning for non-canonical user access address dereferences"). For compat, the current probe_kernel_read() and strncpy_from_unsafe() are left as-is. [0] Documentation/x86/x86_64/mm.txt Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Cc: Linus Torvalds Cc: Masami Hiramatsu Cc: x86@kernel.org Link: https://lore.kernel.org/bpf/eefeefd769aa5a013531f491a71f0936779e916b.1572649915.git.daniel@iogearbox.net --- include/linux/uaccess.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 38555435a64a..67f016010aad 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -311,6 +311,7 @@ copy_struct_from_user(void *dst, size_t ksize, const void __user *src, * happens, handle that and return -EFAULT. */ extern long probe_kernel_read(void *dst, const void *src, size_t size); +extern long probe_kernel_read_strict(void *dst, const void *src, size_t size); extern long __probe_kernel_read(void *dst, const void *src, size_t size); /* @@ -350,6 +351,9 @@ extern long notrace probe_user_write(void __user *dst, const void *src, size_t s extern long notrace __probe_user_write(void __user *dst, const void *src, size_t size); extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count); +extern long strncpy_from_unsafe_strict(char *dst, const void *unsafe_addr, + long count); +extern long __strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count); extern long strncpy_from_unsafe_user(char *dst, const void __user *unsafe_addr, long count); extern long strnlen_unsafe_user(const void __user *unsafe_addr, long count); -- cgit v1.2.3 From c49cfc227e7f49e6011d6b955145814ce13424bc Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Fri, 1 Nov 2019 11:35:05 +0200 Subject: iio: imu: adis: assign value only if return code zero in read funcs The inline read functions in the ADIS library don't check the return value of the `adis_read_reg()` function and assign the value of `tmp` regardless. Fix this by checking if return value is zero and only then assigning the value of `tmp`. No known case of the callers of this function incorrectly using the value, but best to stop any potential risk here. Not suitable for stable due to no known actual bugs caused by this issue. Fixes: 57a1228a06b7a ("iio:imu:adis: Add support for 32bit registers") Signed-off-by: Alexandru Ardelean Signed-off-by: Jonathan Cameron --- include/linux/iio/imu/adis.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h index 4c53815bb729..92aae14593bf 100644 --- a/include/linux/iio/imu/adis.h +++ b/include/linux/iio/imu/adis.h @@ -129,7 +129,8 @@ static inline int adis_read_reg_16(struct adis *adis, unsigned int reg, int ret; ret = adis_read_reg(adis, reg, &tmp, 2); - *val = tmp; + if (ret == 0) + *val = tmp; return ret; } @@ -147,7 +148,8 @@ static inline int adis_read_reg_32(struct adis *adis, unsigned int reg, int ret; ret = adis_read_reg(adis, reg, &tmp, 4); - *val = tmp; + if (ret == 0) + *val = tmp; return ret; } -- cgit v1.2.3 From 9927c6fa3e1d941c9b89f807f5d0480390eb0471 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 16 Oct 2019 06:03:32 -0700 Subject: debugfs: remove return value of debugfs_create_atomic_t() No one checks the return value of debugfs_create_atomic_t(), as it's not needed, so make the return value void, so that no one tries to do so in the future. Link: https://lore.kernel.org/r/20191016130332.GA28240@kroah.com Signed-off-by: Greg Kroah-Hartman --- include/linux/debugfs.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index c127c159d10a..19231e618c16 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -117,8 +117,8 @@ void debugfs_create_x64(const char *name, umode_t mode, struct dentry *parent, u64 *value); void debugfs_create_size_t(const char *name, umode_t mode, struct dentry *parent, size_t *value); -struct dentry *debugfs_create_atomic_t(const char *name, umode_t mode, - struct dentry *parent, atomic_t *value); +void debugfs_create_atomic_t(const char *name, umode_t mode, + struct dentry *parent, atomic_t *value); struct dentry *debugfs_create_bool(const char *name, umode_t mode, struct dentry *parent, bool *value); @@ -284,11 +284,10 @@ static inline void debugfs_create_size_t(const char *name, umode_t mode, struct dentry *parent, size_t *value) { } -static inline struct dentry *debugfs_create_atomic_t(const char *name, umode_t mode, - struct dentry *parent, atomic_t *value) -{ - return ERR_PTR(-ENODEV); -} +static inline void debugfs_create_atomic_t(const char *name, umode_t mode, + struct dentry *parent, + atomic_t *value) +{ } static inline struct dentry *debugfs_create_bool(const char *name, umode_t mode, struct dentry *parent, -- cgit v1.2.3 From 11f70002213774ed233950f71ea8803fa3700aa3 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sun, 29 Sep 2019 23:18:49 +0900 Subject: leds: remove PAGE_SIZE limit of /sys/class/leds//trigger Reading /sys/class/leds//trigger returns all available LED triggers. However, the size of this file is limited to PAGE_SIZE because of the limitation for sysfs attribute. Enabling LED CPU trigger on systems with thousands of CPUs easily hits PAGE_SIZE limit, and makes it impossible to see all available LED triggers and which trigger is currently activated. We work around it here by converting /sys/class/leds//trigger to binary attribute, which is not limited by length. This is _not_ good design, do not copy it. Signed-off-by: Akinobu Mita Cc: "Rafael J. Wysocki" Cc: Pavel Machek Cc: Dan Murphy A Reviewed-by: Greg Kroah-Hartman Signed-off-by: Pavel Machek --- include/linux/leds.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/leds.h b/include/linux/leds.h index b8df71193329..0b6b6166e9ea 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -361,11 +361,6 @@ struct led_trigger { #define led_trigger_get_led(dev) ((struct led_classdev *)dev_get_drvdata((dev))) #define led_trigger_get_drvdata(dev) (led_get_trigger_data(led_trigger_get_led(dev))) -ssize_t led_trigger_store(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count); -ssize_t led_trigger_show(struct device *dev, struct device_attribute *attr, - char *buf); - /* Registration functions for complex triggers */ extern int led_trigger_register(struct led_trigger *trigger); extern void led_trigger_unregister(struct led_trigger *trigger); -- cgit v1.2.3 From 4a29f90e60df955f1b6e0dd836955f14a62dc103 Mon Sep 17 00:00:00 2001 From: Dan Murphy Date: Tue, 1 Oct 2019 13:04:36 -0500 Subject: leds: flash: Convert non extended registration to inline Convert the #define non-extended registration API to an inline function. Signed-off-by: Dan Murphy Signed-off-by: Pavel Machek --- include/linux/led-class-flash.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/led-class-flash.h b/include/linux/led-class-flash.h index 1e824963af17..7ff287a9e2a2 100644 --- a/include/linux/led-class-flash.h +++ b/include/linux/led-class-flash.h @@ -98,8 +98,11 @@ extern int led_classdev_flash_register_ext(struct device *parent, struct led_classdev_flash *fled_cdev, struct led_init_data *init_data); -#define led_classdev_flash_register(parent, fled_cdev) \ - led_classdev_flash_register_ext(parent, fled_cdev, NULL) +static inline int led_classdev_flash_register(struct device *parent, + struct led_classdev_flash *fled_cdev) +{ + return led_classdev_flash_register_ext(parent, fled_cdev, NULL); +} /** * led_classdev_flash_unregister - unregisters an object of led_classdev class -- cgit v1.2.3 From 57e5c31e53758aad96699e784a752ad944890b25 Mon Sep 17 00:00:00 2001 From: Dan Murphy Date: Wed, 2 Oct 2019 07:40:37 -0500 Subject: leds: flash: Remove extern from the header file extern is implied and is not needed in the header file. Remove the extern keyword and re-align the code. Signed-off-by: Dan Murphy Signed-off-by: Pavel Machek --- include/linux/led-class-flash.h | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/led-class-flash.h b/include/linux/led-class-flash.h index 7ff287a9e2a2..1bd83159fa4c 100644 --- a/include/linux/led-class-flash.h +++ b/include/linux/led-class-flash.h @@ -94,12 +94,12 @@ static inline struct led_classdev_flash *lcdev_to_flcdev( * * Returns: 0 on success or negative error value on failure */ -extern int led_classdev_flash_register_ext(struct device *parent, - struct led_classdev_flash *fled_cdev, - struct led_init_data *init_data); +int led_classdev_flash_register_ext(struct device *parent, + struct led_classdev_flash *fled_cdev, + struct led_init_data *init_data); static inline int led_classdev_flash_register(struct device *parent, - struct led_classdev_flash *fled_cdev) + struct led_classdev_flash *fled_cdev) { return led_classdev_flash_register_ext(parent, fled_cdev, NULL); } @@ -111,7 +111,7 @@ static inline int led_classdev_flash_register(struct device *parent, * * Unregister a previously registered via led_classdev_flash_register object */ -extern void led_classdev_flash_unregister(struct led_classdev_flash *fled_cdev); +void led_classdev_flash_unregister(struct led_classdev_flash *fled_cdev); /** * led_set_flash_strobe - setup flash strobe @@ -159,8 +159,8 @@ static inline int led_get_flash_strobe(struct led_classdev_flash *fled_cdev, * * Returns: 0 on success or negative error value on failure */ -extern int led_set_flash_brightness(struct led_classdev_flash *fled_cdev, - u32 brightness); +int led_set_flash_brightness(struct led_classdev_flash *fled_cdev, + u32 brightness); /** * led_update_flash_brightness - update flash LED brightness @@ -171,7 +171,7 @@ extern int led_set_flash_brightness(struct led_classdev_flash *fled_cdev, * * Returns: 0 on success or negative error value on failure */ -extern int led_update_flash_brightness(struct led_classdev_flash *fled_cdev); +int led_update_flash_brightness(struct led_classdev_flash *fled_cdev); /** * led_set_flash_timeout - set flash LED timeout @@ -182,8 +182,7 @@ extern int led_update_flash_brightness(struct led_classdev_flash *fled_cdev); * * Returns: 0 on success or negative error value on failure */ -extern int led_set_flash_timeout(struct led_classdev_flash *fled_cdev, - u32 timeout); +int led_set_flash_timeout(struct led_classdev_flash *fled_cdev, u32 timeout); /** * led_get_flash_fault - get the flash LED fault @@ -194,7 +193,6 @@ extern int led_set_flash_timeout(struct led_classdev_flash *fled_cdev, * * Returns: 0 on success or negative error value on failure */ -extern int led_get_flash_fault(struct led_classdev_flash *fled_cdev, - u32 *fault); +int led_get_flash_fault(struct led_classdev_flash *fled_cdev, u32 *fault); #endif /* __LINUX_FLASH_LEDS_H_INCLUDED */ -- cgit v1.2.3 From 20cdba9d9c165e475fcc5af97857b6fa7aec96a0 Mon Sep 17 00:00:00 2001 From: Dan Murphy Date: Wed, 2 Oct 2019 07:40:38 -0500 Subject: leds: flash: Add devm_* functions to the flash class Add the missing device managed API for registration and unregistration for the LED flash class. Signed-off-by: Dan Murphy Signed-off-by: Pavel Machek --- include/linux/led-class-flash.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/led-class-flash.h b/include/linux/led-class-flash.h index 1bd83159fa4c..21a3358a1731 100644 --- a/include/linux/led-class-flash.h +++ b/include/linux/led-class-flash.h @@ -113,6 +113,20 @@ static inline int led_classdev_flash_register(struct device *parent, */ void led_classdev_flash_unregister(struct led_classdev_flash *fled_cdev); +int devm_led_classdev_flash_register_ext(struct device *parent, + struct led_classdev_flash *fled_cdev, + struct led_init_data *init_data); + + +static inline int devm_led_classdev_flash_register(struct device *parent, + struct led_classdev_flash *fled_cdev) +{ + return devm_led_classdev_flash_register_ext(parent, fled_cdev, NULL); +} + +void devm_led_classdev_flash_unregister(struct device *parent, + struct led_classdev_flash *fled_cdev); + /** * led_set_flash_strobe - setup flash strobe * @fled_cdev: the flash LED to set strobe on -- cgit v1.2.3 From ec28a8cfdce6306afcbf528e231a733010c82251 Mon Sep 17 00:00:00 2001 From: Dan Murphy Date: Wed, 2 Oct 2019 07:40:40 -0500 Subject: leds: core: Remove extern from header extern is implied and is not needed in the header file. Remove the extern keyword and re-align the code. Signed-off-by: Dan Murphy Signed-off-by: Pavel Machek --- include/linux/leds.h | 95 ++++++++++++++++++++++++---------------------------- 1 file changed, 44 insertions(+), 51 deletions(-) (limited to 'include/linux') diff --git a/include/linux/leds.h b/include/linux/leds.h index 0b6b6166e9ea..52e50183b963 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -161,7 +161,7 @@ struct led_classdev { * * Returns: 0 on success or negative error value on failure */ -extern int led_classdev_register_ext(struct device *parent, +int led_classdev_register_ext(struct device *parent, struct led_classdev *led_cdev, struct led_init_data *init_data); @@ -181,7 +181,7 @@ static inline int led_classdev_register(struct device *parent, return led_classdev_register_ext(parent, led_cdev, NULL); } -extern int devm_led_classdev_register_ext(struct device *parent, +int devm_led_classdev_register_ext(struct device *parent, struct led_classdev *led_cdev, struct led_init_data *init_data); @@ -190,11 +190,11 @@ static inline int devm_led_classdev_register(struct device *parent, { return devm_led_classdev_register_ext(parent, led_cdev, NULL); } -extern void led_classdev_unregister(struct led_classdev *led_cdev); -extern void devm_led_classdev_unregister(struct device *parent, - struct led_classdev *led_cdev); -extern void led_classdev_suspend(struct led_classdev *led_cdev); -extern void led_classdev_resume(struct led_classdev *led_cdev); +void led_classdev_unregister(struct led_classdev *led_cdev); +void devm_led_classdev_unregister(struct device *parent, + struct led_classdev *led_cdev); +void led_classdev_suspend(struct led_classdev *led_cdev); +void led_classdev_resume(struct led_classdev *led_cdev); /** * led_blink_set - set blinking with software fallback @@ -211,9 +211,8 @@ extern void led_classdev_resume(struct led_classdev *led_cdev); * led_cdev->brightness_set() will not stop the blinking, * use led_classdev_brightness_set() instead. */ -extern void led_blink_set(struct led_classdev *led_cdev, - unsigned long *delay_on, - unsigned long *delay_off); +void led_blink_set(struct led_classdev *led_cdev, unsigned long *delay_on, + unsigned long *delay_off); /** * led_blink_set_oneshot - do a oneshot software blink * @led_cdev: the LED to start blinking @@ -228,10 +227,9 @@ extern void led_blink_set(struct led_classdev *led_cdev, * If invert is set, led blinks for delay_off first, then for * delay_on and leave the led on after the on-off cycle. */ -extern void led_blink_set_oneshot(struct led_classdev *led_cdev, - unsigned long *delay_on, - unsigned long *delay_off, - int invert); +void led_blink_set_oneshot(struct led_classdev *led_cdev, + unsigned long *delay_on, unsigned long *delay_off, + int invert); /** * led_set_brightness - set LED brightness * @led_cdev: the LED to set @@ -241,8 +239,8 @@ extern void led_blink_set_oneshot(struct led_classdev *led_cdev, * software blink timer that implements blinking when the * hardware doesn't. This function is guaranteed not to sleep. */ -extern void led_set_brightness(struct led_classdev *led_cdev, - enum led_brightness brightness); +void led_set_brightness(struct led_classdev *led_cdev, + enum led_brightness brightness); /** * led_set_brightness_sync - set LED brightness synchronously @@ -255,8 +253,8 @@ extern void led_set_brightness(struct led_classdev *led_cdev, * * Returns: 0 on success or negative error value on failure */ -extern int led_set_brightness_sync(struct led_classdev *led_cdev, - enum led_brightness value); +int led_set_brightness_sync(struct led_classdev *led_cdev, + enum led_brightness value); /** * led_update_brightness - update LED brightness @@ -267,7 +265,7 @@ extern int led_set_brightness_sync(struct led_classdev *led_cdev, * * Returns: 0 on success or negative error value on failure */ -extern int led_update_brightness(struct led_classdev *led_cdev); +int led_update_brightness(struct led_classdev *led_cdev); /** * led_get_default_pattern - return default pattern @@ -279,8 +277,7 @@ extern int led_update_brightness(struct led_classdev *led_cdev); * Return: Allocated array of integers with default pattern from device tree * or NULL. Caller is responsible for kfree(). */ -extern u32 *led_get_default_pattern(struct led_classdev *led_cdev, - unsigned int *size); +u32 *led_get_default_pattern(struct led_classdev *led_cdev, unsigned int *size); /** * led_sysfs_disable - disable LED sysfs interface @@ -288,7 +285,7 @@ extern u32 *led_get_default_pattern(struct led_classdev *led_cdev, * * Disable the led_cdev's sysfs interface. */ -extern void led_sysfs_disable(struct led_classdev *led_cdev); +void led_sysfs_disable(struct led_classdev *led_cdev); /** * led_sysfs_enable - enable LED sysfs interface @@ -296,7 +293,7 @@ extern void led_sysfs_disable(struct led_classdev *led_cdev); * * Enable the led_cdev's sysfs interface. */ -extern void led_sysfs_enable(struct led_classdev *led_cdev); +void led_sysfs_enable(struct led_classdev *led_cdev); /** * led_compose_name - compose LED class device name @@ -311,8 +308,8 @@ extern void led_sysfs_enable(struct led_classdev *led_cdev); * * Returns: 0 on success or negative error value on failure */ -extern int led_compose_name(struct device *dev, struct led_init_data *init_data, - char *led_classdev_name); +int led_compose_name(struct device *dev, struct led_init_data *init_data, + char *led_classdev_name); /** * led_sysfs_is_disabled - check if LED sysfs interface is disabled @@ -362,27 +359,24 @@ struct led_trigger { #define led_trigger_get_drvdata(dev) (led_get_trigger_data(led_trigger_get_led(dev))) /* Registration functions for complex triggers */ -extern int led_trigger_register(struct led_trigger *trigger); -extern void led_trigger_unregister(struct led_trigger *trigger); -extern int devm_led_trigger_register(struct device *dev, +int led_trigger_register(struct led_trigger *trigger); +void led_trigger_unregister(struct led_trigger *trigger); +int devm_led_trigger_register(struct device *dev, struct led_trigger *trigger); -extern void led_trigger_register_simple(const char *name, +void led_trigger_register_simple(const char *name, struct led_trigger **trigger); -extern void led_trigger_unregister_simple(struct led_trigger *trigger); -extern void led_trigger_event(struct led_trigger *trigger, - enum led_brightness event); -extern void led_trigger_blink(struct led_trigger *trigger, - unsigned long *delay_on, - unsigned long *delay_off); -extern void led_trigger_blink_oneshot(struct led_trigger *trigger, - unsigned long *delay_on, - unsigned long *delay_off, - int invert); -extern void led_trigger_set_default(struct led_classdev *led_cdev); -extern int led_trigger_set(struct led_classdev *led_cdev, - struct led_trigger *trigger); -extern void led_trigger_remove(struct led_classdev *led_cdev); +void led_trigger_unregister_simple(struct led_trigger *trigger); +void led_trigger_event(struct led_trigger *trigger, enum led_brightness event); +void led_trigger_blink(struct led_trigger *trigger, unsigned long *delay_on, + unsigned long *delay_off); +void led_trigger_blink_oneshot(struct led_trigger *trigger, + unsigned long *delay_on, + unsigned long *delay_off, + int invert); +void led_trigger_set_default(struct led_classdev *led_cdev); +int led_trigger_set(struct led_classdev *led_cdev, struct led_trigger *trigger); +void led_trigger_remove(struct led_classdev *led_cdev); static inline void led_set_trigger_data(struct led_classdev *led_cdev, void *trigger_data) @@ -410,8 +404,7 @@ static inline void *led_get_trigger_data(struct led_classdev *led_cdev) * This is meant to be used on triggers with statically * allocated name. */ -extern void led_trigger_rename_static(const char *name, - struct led_trigger *trig); +void led_trigger_rename_static(const char *name, struct led_trigger *trig); #define module_led_trigger(__led_trigger) \ module_driver(__led_trigger, led_trigger_register, \ @@ -453,20 +446,20 @@ static inline void *led_get_trigger_data(struct led_classdev *led_cdev) /* Trigger specific functions */ #ifdef CONFIG_LEDS_TRIGGER_DISK -extern void ledtrig_disk_activity(bool write); +void ledtrig_disk_activity(bool write); #else static inline void ledtrig_disk_activity(bool write) {} #endif #ifdef CONFIG_LEDS_TRIGGER_MTD -extern void ledtrig_mtd_activity(void); +void ledtrig_mtd_activity(void); #else static inline void ledtrig_mtd_activity(void) {} #endif #if defined(CONFIG_LEDS_TRIGGER_CAMERA) || defined(CONFIG_LEDS_TRIGGER_CAMERA_MODULE) -extern void ledtrig_flash_ctrl(bool on); -extern void ledtrig_torch_ctrl(bool on); +void ledtrig_flash_ctrl(bool on); +void ledtrig_torch_ctrl(bool on); #else static inline void ledtrig_flash_ctrl(bool on) {} static inline void ledtrig_torch_ctrl(bool on) {} @@ -546,7 +539,7 @@ enum cpu_led_event { CPU_LED_HALTED, /* Machine shutdown */ }; #ifdef CONFIG_LEDS_TRIGGER_CPU -extern void ledtrig_cpu(enum cpu_led_event evt); +void ledtrig_cpu(enum cpu_led_event evt); #else static inline void ledtrig_cpu(enum cpu_led_event evt) { @@ -555,7 +548,7 @@ static inline void ledtrig_cpu(enum cpu_led_event evt) #endif #ifdef CONFIG_LEDS_BRIGHTNESS_HW_CHANGED -extern void led_classdev_notify_brightness_hw_changed( +void led_classdev_notify_brightness_hw_changed( struct led_classdev *led_cdev, enum led_brightness brightness); #else static inline void led_classdev_notify_brightness_hw_changed( -- cgit v1.2.3 From 9cc93be7b0c91a87ef452457c706af62741249d7 Mon Sep 17 00:00:00 2001 From: Dan Murphy Date: Wed, 2 Oct 2019 07:40:42 -0500 Subject: leds: core: Fix leds.h structure documentation Update the leds.h structure documentation to define the correct arguments. Signed-off-by: Dan Murphy Signed-off-by: Pavel Machek --- include/linux/leds.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/leds.h b/include/linux/leds.h index 52e50183b963..242258f7d837 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -245,7 +245,7 @@ void led_set_brightness(struct led_classdev *led_cdev, /** * led_set_brightness_sync - set LED brightness synchronously * @led_cdev: the LED to set - * @brightness: the brightness to set it to + * @value: the brightness to set it to * * Set an LED's brightness immediately. This function will block * the caller for the time required for accessing device registers, @@ -298,8 +298,7 @@ void led_sysfs_enable(struct led_classdev *led_cdev); /** * led_compose_name - compose LED class device name * @dev: LED controller device object - * @child: child fwnode_handle describing a LED or a group of synchronized LEDs; - * it must be provided only for fwnode based LEDs + * @init_data: the LED class device initialization data * @led_classdev_name: composed LED class device name * * Create LED class device name basing on the provided init_data argument. -- cgit v1.2.3 From d3504757f3f049b553ba0eda8bd17cd1f2651285 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 25 Oct 2019 11:41:24 +0200 Subject: debugfs: Add debugfs_create_xul() for hexadecimal unsigned long The existing debugfs_create_ulong() function supports objects of type "unsigned long", which are 32-bit or 64-bit depending on the platform, in decimal form. To format objects in hexadecimal, various debugfs_create_x*() functions exist, but all of them take fixed-size types. Add a debugfs helper for "unsigned long" objects in hexadecimal format. This avoids the need for users to open-code the same, or introduce bugs when casting the value pointer to "u32 *" or "u64 *" to call debugfs_create_x{32,64}(). Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20191025094130.26033-2-geert+renesas@glider.be Signed-off-by: Greg Kroah-Hartman --- include/linux/debugfs.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 19231e618c16..0e8f2e0cb91f 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -351,4 +351,25 @@ static inline ssize_t debugfs_write_file_bool(struct file *file, #endif +/** + * debugfs_create_xul - create a debugfs file that is used to read and write an + * unsigned long value, formatted in hexadecimal + * @name: a pointer to a string containing the name of the file to create. + * @mode: the permission that the file should have + * @parent: a pointer to the parent dentry for this file. This should be a + * directory dentry if set. If this parameter is %NULL, then the + * file will be created in the root of the debugfs filesystem. + * @value: a pointer to the variable that the file should read to and write + * from. + */ +static inline void debugfs_create_xul(const char *name, umode_t mode, + struct dentry *parent, + unsigned long *value) +{ + if (sizeof(*value) == sizeof(u32)) + debugfs_create_x32(name, mode, parent, (u32 *)value); + else + debugfs_create_x64(name, mode, parent, (u64 *)value); +} + #endif -- cgit v1.2.3 From e86d5a02874c1364c50e1b532481835b54173ed2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 4 Oct 2019 16:38:56 -0400 Subject: NFS: Convert struct nfs_fattr to use struct timespec64 NFSv4 supports 64-bit times, so we should switch to using struct timespec64 when decoding attributes. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 2 +- include/linux/nfs_xdr.h | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index a87fe854f008..47266870a235 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -171,7 +171,7 @@ struct nfs_server { struct nfs_fsid fsid; __u64 maxfilesize; /* maximum file size */ - struct timespec time_delta; /* smallest time granularity */ + struct timespec64 time_delta; /* smallest time granularity */ unsigned long mount_time; /* when this fs was mounted */ struct super_block *super; /* VFS super block */ dev_t s_dev; /* superblock dev numbers */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 9b8324ec08f3..db5c01001937 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -62,14 +62,14 @@ struct nfs_fattr { struct nfs_fsid fsid; __u64 fileid; __u64 mounted_on_fileid; - struct timespec atime; - struct timespec mtime; - struct timespec ctime; + struct timespec64 atime; + struct timespec64 mtime; + struct timespec64 ctime; __u64 change_attr; /* NFSv4 change attribute */ __u64 pre_change_attr;/* pre-op NFSv4 change attribute */ __u64 pre_size; /* pre_op_attr.size */ - struct timespec pre_mtime; /* pre_op_attr.mtime */ - struct timespec pre_ctime; /* pre_op_attr.ctime */ + struct timespec64 pre_mtime; /* pre_op_attr.mtime */ + struct timespec64 pre_ctime; /* pre_op_attr.ctime */ unsigned long time_start; unsigned long gencount; struct nfs4_string *owner_name; @@ -143,7 +143,7 @@ struct nfs_fsinfo { __u32 wtmult; /* writes should be multiple of this */ __u32 dtpref; /* pref. readdir transfer size */ __u64 maxfilesize; - struct timespec time_delta; /* server time granularity */ + struct timespec64 time_delta; /* server time granularity */ __u32 lease_time; /* in seconds */ __u32 nlayouttypes; /* number of layouttypes */ __u32 layouttype[NFS_MAX_LAYOUT_TYPES]; /* supported pnfs layout driver */ -- cgit v1.2.3 From 6430b323ae09f146dfc26e6d17c432bfc3d11452 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 4 Oct 2019 17:00:02 -0400 Subject: NFSv3: Clean up timespec encode Simplify the struct iattr timestamp encoding by skipping the step of an intermediate struct timespec. Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index db5c01001937..22bc6613474e 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -869,7 +869,7 @@ struct nfs3_sattrargs { struct nfs_fh * fh; struct iattr * sattr; unsigned int guard; - struct timespec guardtime; + struct timespec64 guardtime; }; struct nfs3_diropargs { -- cgit v1.2.3 From 4b1b69cedf9de8c203101ea74510c07d428538f7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 3 Oct 2019 14:08:43 -0400 Subject: NFS: Add a flag to tell nfs_client to set RPC_CLNT_CREATE_NOPING Add a flag to tell the nfs_client it should set RPC_CLNT_CREATE_NOPING when creating the rpc client. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 47266870a235..a50dd432475b 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -45,6 +45,7 @@ struct nfs_client { #define NFS_CS_INFINITE_SLOTS 3 /* - don't limit TCP slots */ #define NFS_CS_NO_RETRANS_TIMEOUT 4 /* - Disable retransmit timeouts */ #define NFS_CS_TSM_POSSIBLE 5 /* - Maybe state migration */ +#define NFS_CS_NOPING 6 /* - don't ping on connect */ struct sockaddr_storage cl_addr; /* server identifier */ size_t cl_addrlen; char * cl_hostname; /* hostname of server */ -- cgit v1.2.3 From 52f98f1a2ddd2bb561f2c7e3b19a81d816a63118 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 17 Oct 2019 09:49:45 -0400 Subject: NFS/pnfs: Separate NFSv3 DS and MDS traffic If a NFSv3 server is being used as both a DS and as a regular NFSv3 server, we may want to keep the IO traffic on a separate TCP connection, since it will typically have very different timeout characteristics. This patch therefore sets up a flag to separate the two modes of operation for the nfs_client. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index a50dd432475b..69e80cef5a81 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -46,6 +46,7 @@ struct nfs_client { #define NFS_CS_NO_RETRANS_TIMEOUT 4 /* - Disable retransmit timeouts */ #define NFS_CS_TSM_POSSIBLE 5 /* - Maybe state migration */ #define NFS_CS_NOPING 6 /* - don't ping on connect */ +#define NFS_CS_DS 7 /* - Server is a DS */ struct sockaddr_storage cl_addr; /* server identifier */ size_t cl_addrlen; char * cl_hostname; /* hostname of server */ -- cgit v1.2.3 From e6237b6feb37582fbd6bd7a8336d1256a6b4b4f9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 17 Oct 2019 11:13:54 -0400 Subject: NFSv4.1: Don't rebind to the same source port when reconnecting to the server NFSv2, v3 and NFSv4 servers often have duplicate replay caches that look at the source port when deciding whether or not an RPC call is a replay of a previous call. This requires clients to perform strange TCP gymnastics in order to ensure that when they reconnect to the server, they bind to the same source port. NFSv4.1 and NFSv4.2 have sessions that provide proper replay semantics, that do not look at the source port of the connection. This patch therefore ensures they can ignore the rebind requirement. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 1 + include/linux/sunrpc/clnt.h | 1 + include/linux/sunrpc/xprt.h | 3 ++- 3 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 69e80cef5a81..df61ff8981e8 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -47,6 +47,7 @@ struct nfs_client { #define NFS_CS_TSM_POSSIBLE 5 /* - Maybe state migration */ #define NFS_CS_NOPING 6 /* - don't ping on connect */ #define NFS_CS_DS 7 /* - Server is a DS */ +#define NFS_CS_REUSEPORT 8 /* - reuse src port on reconnect */ struct sockaddr_storage cl_addr; /* server identifier */ size_t cl_addrlen; char * cl_hostname; /* hostname of server */ diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index abc63bd1be2b..ec52e78d432b 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -149,6 +149,7 @@ struct rpc_add_xprt_test { #define RPC_CLNT_CREATE_NO_IDLE_TIMEOUT (1UL << 8) #define RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT (1UL << 9) #define RPC_CLNT_CREATE_SOFTERR (1UL << 10) +#define RPC_CLNT_CREATE_REUSEPORT (1UL << 11) struct rpc_clnt *rpc_create(struct rpc_create_args *args); struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index d783e15ba898..ccd35cf4fc41 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -207,7 +207,8 @@ struct rpc_xprt { unsigned int min_reqs; /* min number of slots */ unsigned int num_reqs; /* total slots */ unsigned long state; /* transport state */ - unsigned char resvport : 1; /* use a reserved port */ + unsigned char resvport : 1, /* use a reserved port */ + reuseport : 1; /* reuse port on reconnect */ atomic_t swapper; /* we're swapping over this transport */ unsigned int bind_index; /* bind function index */ -- cgit v1.2.3 From c7d3d28360fdb3ed3a5aa0bab19315e0fdc994a1 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 1 Nov 2019 17:45:31 +0100 Subject: quota: Factor out setup of quota inode Factor out setting up of quota inode and eventual error cleanup from vfs_load_quota_inode(). This will simplify situation for filesystems that don't have any quota inodes. Signed-off-by: Jan Kara --- include/linux/quotaops.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 185d94829701..2625766bcfe7 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -89,6 +89,8 @@ int dquot_file_open(struct inode *inode, struct file *file); int dquot_enable(struct inode *inode, int type, int format_id, unsigned int flags); +int dquot_load_quota_sb(struct super_block *sb, int type, int format_id, + unsigned int flags); int dquot_quota_on(struct super_block *sb, int type, int format_id, const struct path *path); int dquot_quota_on_mount(struct super_block *sb, char *qf_name, -- cgit v1.2.3 From dc19432ae1c22d696f91edea11ae06c348b4e88a Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 1 Nov 2019 18:37:44 +0100 Subject: quota: Rename vfs_load_quota_inode() to dquot_load_quota_inode() Rename vfs_load_quota_inode() to dquot_load_quota_inode() to be consistent with naming of other functions used for enabling quota accounting from filesystems. Also export the function and add some sanity checks to assure filesystems are calling the function properly. Signed-off-by: Jan Kara --- include/linux/quotaops.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 2625766bcfe7..0ce9da5a1a93 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -91,6 +91,8 @@ int dquot_enable(struct inode *inode, int type, int format_id, unsigned int flags); int dquot_load_quota_sb(struct super_block *sb, int type, int format_id, unsigned int flags); +int dquot_load_quota_inode(struct inode *inode, int type, int format_id, + unsigned int flags); int dquot_quota_on(struct super_block *sb, int type, int format_id, const struct path *path); int dquot_quota_on_mount(struct super_block *sb, char *qf_name, -- cgit v1.2.3 From 069a9166369773627e51c5249cd7f9169aecd7fa Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 1 Nov 2019 18:57:56 +0100 Subject: quota: Drop dquot_enable() Now dquot_enable() has only two internal callers and both of them just need to update quota flags and don't need most of checks. Just drop dquot_enable() and fold necessary functionality into the two calling places. Signed-off-by: Jan Kara --- include/linux/quotaops.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 0ce9da5a1a93..6b8ebc8d715e 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -87,8 +87,6 @@ int dquot_mark_dquot_dirty(struct dquot *dquot); int dquot_file_open(struct inode *inode, struct file *file); -int dquot_enable(struct inode *inode, int type, int format_id, - unsigned int flags); int dquot_load_quota_sb(struct super_block *sb, int type, int format_id, unsigned int flags); int dquot_load_quota_inode(struct inode *inode, int type, int format_id, -- cgit v1.2.3 From c6919d5e0cd168a732034d8dc19fdc3dff683a2b Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Tue, 8 Oct 2019 15:25:59 +0300 Subject: usb: roles: Add usb_role_switch_find_by_fwnode() Simple wrapper function that searches USB role switches with class_find_device_by_fwnode(). Signed-off-by: Heikki Krogerus Reviewed-by: Hans de Goede Tested-by: Hans de Goede Link: https://lore.kernel.org/r/20191008122600.22340-2-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/role.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/role.h b/include/linux/usb/role.h index 2d77f97df72d..efac3af83d6b 100644 --- a/include/linux/usb/role.h +++ b/include/linux/usb/role.h @@ -50,6 +50,9 @@ struct usb_role_switch *usb_role_switch_get(struct device *dev); struct usb_role_switch *fwnode_usb_role_switch_get(struct fwnode_handle *node); void usb_role_switch_put(struct usb_role_switch *sw); +struct usb_role_switch * +usb_role_switch_find_by_fwnode(const struct fwnode_handle *fwnode); + struct usb_role_switch * usb_role_switch_register(struct device *parent, const struct usb_role_switch_desc *desc); -- cgit v1.2.3 From 51f421c85c880dcb37df11e672b384eaa4444328 Mon Sep 17 00:00:00 2001 From: Revanth Rajashekar Date: Thu, 31 Oct 2019 10:13:21 -0600 Subject: block: sed-opal: Add support to read/write opal tables generically This feature gives the user RW access to any opal table with admin1 authority. The flags described in the new structure determines if the user wants to read/write the data. Flags are checked for valid values in order to allow future features to be added to the ioctl. The user can provide the desired table's UID. Also, the ioctl provides a size and offset field and internally will loop data accesses to return the full data block. Read overrun is prevented by the initiator's sec_send_recv() backend. The ioctl provides a private field with the intention to accommodate any future expansions to the ioctl. Reviewed-by: Scott Bauer Reviewed-by: Jon Derrick Signed-off-by: Revanth Rajashekar Signed-off-by: Jens Axboe --- include/linux/sed-opal.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sed-opal.h b/include/linux/sed-opal.h index 53c28d750a45..1ac0d712a9c3 100644 --- a/include/linux/sed-opal.h +++ b/include/linux/sed-opal.h @@ -42,6 +42,7 @@ static inline bool is_sed_ioctl(unsigned int cmd) case IOC_OPAL_PSID_REVERT_TPR: case IOC_OPAL_MBR_DONE: case IOC_OPAL_WRITE_SHADOW_MBR: + case IOC_OPAL_GENERIC_TABLE_RW: return true; } return false; -- cgit v1.2.3 From 7162431dcf72032835d369c8d7b51311df407938 Mon Sep 17 00:00:00 2001 From: Miroslav Benes Date: Wed, 16 Oct 2019 13:33:13 +0200 Subject: ftrace: Introduce PERMANENT ftrace_ops flag Livepatch uses ftrace for redirection to new patched functions. It means that if ftrace is disabled, all live patched functions are disabled as well. Toggling global 'ftrace_enabled' sysctl thus affect it directly. It is not a problem per se, because only administrator can set sysctl values, but it still may be surprising. Introduce PERMANENT ftrace_ops flag to amend this. If the FTRACE_OPS_FL_PERMANENT is set on any ftrace ops, the tracing cannot be disabled by disabling ftrace_enabled. Equally, a callback with the flag set cannot be registered if ftrace_enabled is disabled. Link: http://lkml.kernel.org/r/20191016113316.13415-2-mbenes@suse.cz Reviewed-by: Petr Mladek Reviewed-by: Kamalesh Babulal Signed-off-by: Miroslav Benes Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 8a8cb3c401b2..8385cafe4f9f 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -142,6 +142,8 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops); * PID - Is affected by set_ftrace_pid (allows filtering on those pids) * RCU - Set when the ops can only be called when RCU is watching. * TRACE_ARRAY - The ops->private points to a trace_array descriptor. + * PERMANENT - Set when the ops is permanent and should not be affected by + * ftrace_enabled. */ enum { FTRACE_OPS_FL_ENABLED = 1 << 0, @@ -160,6 +162,7 @@ enum { FTRACE_OPS_FL_PID = 1 << 13, FTRACE_OPS_FL_RCU = 1 << 14, FTRACE_OPS_FL_TRACE_ARRAY = 1 << 15, + FTRACE_OPS_FL_PERMANENT = 1 << 16, }; #ifdef CONFIG_DYNAMIC_FTRACE -- cgit v1.2.3 From 8c127a42af89c39560a8c5bd5accadaaa5741f8c Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Mon, 4 Nov 2019 17:24:19 +0300 Subject: usb: typec: Introduce typec_get_drvdata() Leaving the private driver_data pointer of the port device to the port drivers. Signed-off-by: Heikki Krogerus Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20191104142435.29960-3-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/typec.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h index 7df4ecabc78a..8b90cd77331c 100644 --- a/include/linux/usb/typec.h +++ b/include/linux/usb/typec.h @@ -179,6 +179,7 @@ struct typec_partner_desc { * @sw: Cable plug orientation switch * @mux: Multiplexer switch for Alternate/Accessory Modes * @fwnode: Optional fwnode of the port + * @driver_data: Private pointer for driver specific info * @try_role: Set data role preference for DRP port * @dr_set: Set Data Role * @pr_set: Set Power Role @@ -198,6 +199,7 @@ struct typec_capability { struct typec_switch *sw; struct typec_mux *mux; struct fwnode_handle *fwnode; + void *driver_data; int (*try_role)(const struct typec_capability *, int role); @@ -241,6 +243,8 @@ int typec_set_orientation(struct typec_port *port, enum typec_orientation typec_get_orientation(struct typec_port *port); int typec_set_mode(struct typec_port *port, int mode); +void *typec_get_drvdata(struct typec_port *port); + int typec_find_port_power_role(const char *name); int typec_find_power_role(const char *name); int typec_find_port_data_role(const char *name); -- cgit v1.2.3 From 46310e4dade2bc3b574d540e421e3aa9f32cfd5f Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Mon, 4 Nov 2019 17:24:20 +0300 Subject: usb: typec: Separate the operations vector Introducing struct typec_operations which has the same callbacks as struct typec_capability. The old callbacks are kept for now, but after all users have been converted, they will be removed. Signed-off-by: Heikki Krogerus Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20191104142435.29960-4-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/typec.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h index 8b90cd77331c..c9bef128453b 100644 --- a/include/linux/usb/typec.h +++ b/include/linux/usb/typec.h @@ -168,6 +168,23 @@ struct typec_partner_desc { struct usb_pd_identity *identity; }; +/** + * struct typec_operations - USB Type-C Port Operations + * @try_role: Set data role preference for DRP port + * @dr_set: Set Data Role + * @pr_set: Set Power Role + * @vconn_set: Source VCONN + * @port_type_set: Set port type + */ +struct typec_operations { + int (*try_role)(struct typec_port *port, int role); + int (*dr_set)(struct typec_port *port, enum typec_data_role role); + int (*pr_set)(struct typec_port *port, enum typec_role role); + int (*vconn_set)(struct typec_port *port, enum typec_role role); + int (*port_type_set)(struct typec_port *port, + enum typec_port_type type); +}; + /* * struct typec_capability - USB Type-C Port Capabilities * @type: Supported power role of the port @@ -180,6 +197,7 @@ struct typec_partner_desc { * @mux: Multiplexer switch for Alternate/Accessory Modes * @fwnode: Optional fwnode of the port * @driver_data: Private pointer for driver specific info + * @ops: Port operations vector * @try_role: Set data role preference for DRP port * @dr_set: Set Data Role * @pr_set: Set Power Role @@ -201,6 +219,8 @@ struct typec_capability { struct fwnode_handle *fwnode; void *driver_data; + const struct typec_operations *ops; + int (*try_role)(const struct typec_capability *, int role); -- cgit v1.2.3 From 8c038ea8b65fc803cd35423b8a1ff7057dd52f8b Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Mon, 4 Nov 2019 17:24:25 +0300 Subject: usb: typec: Remove the callback members from struct typec_capability There are no more users for them. Signed-off-by: Heikki Krogerus Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20191104142435.29960-9-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/typec.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h index c9bef128453b..894798084319 100644 --- a/include/linux/usb/typec.h +++ b/include/linux/usb/typec.h @@ -198,11 +198,6 @@ struct typec_operations { * @fwnode: Optional fwnode of the port * @driver_data: Private pointer for driver specific info * @ops: Port operations vector - * @try_role: Set data role preference for DRP port - * @dr_set: Set Data Role - * @pr_set: Set Power Role - * @vconn_set: Set VCONN Role - * @port_type_set: Set port type * * Static capabilities of a single USB Type-C port. */ @@ -220,18 +215,6 @@ struct typec_capability { void *driver_data; const struct typec_operations *ops; - - int (*try_role)(const struct typec_capability *, - int role); - - int (*dr_set)(const struct typec_capability *, - enum typec_data_role); - int (*pr_set)(const struct typec_capability *, - enum typec_role); - int (*vconn_set)(const struct typec_capability *, - enum typec_role); - int (*port_type_set)(const struct typec_capability *, - enum typec_port_type); }; /* Specific to try_role(). Indicates the user want's to clear the preference. */ -- cgit v1.2.3 From 61d78ee29a0bf1078ebf5bbb2ccab7e7998d7410 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Mon, 4 Nov 2019 17:24:26 +0300 Subject: usb: typec: Remove unused members from struct typec_capability The members for the muxes are not used, so dropping them. Signed-off-by: Heikki Krogerus Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20191104142435.29960-10-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/typec.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h index 894798084319..0f52723a11bd 100644 --- a/include/linux/usb/typec.h +++ b/include/linux/usb/typec.h @@ -209,8 +209,6 @@ struct typec_capability { int prefer_role; enum typec_accessory accessory[TYPEC_MAX_ACCESSORY]; - struct typec_switch *sw; - struct typec_mux *mux; struct fwnode_handle *fwnode; void *driver_data; -- cgit v1.2.3 From 5eb0e0e4f90addc6b79ebf1cb4b06b56b09f09de Mon Sep 17 00:00:00 2001 From: Vivek Gautam Date: Fri, 20 Sep 2019 13:34:28 +0530 Subject: firmware/qcom_scm: Add scm call to handle smmu errata Qcom's smmu-500 needs to toggle wait-for-safe sequence to handle TLB invalidation sync's. Few firmwares allow doing that through SCM interface. Add API to toggle wait for safe from firmware through a SCM call. Signed-off-by: Vivek Gautam Reviewed-by: Bjorn Andersson Reviewed-by: Stephen Boyd Acked-by: Andy Gross Signed-off-by: Sai Prakash Ranjan Signed-off-by: Will Deacon --- include/linux/qcom_scm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h index 2d5eff506e13..ffd72b3b14ee 100644 --- a/include/linux/qcom_scm.h +++ b/include/linux/qcom_scm.h @@ -58,6 +58,7 @@ extern int qcom_scm_set_remote_state(u32 state, u32 id); extern int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare); extern int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size); extern int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare); +extern int qcom_scm_qsmmu500_wait_safe_toggle(bool en); extern int qcom_scm_io_readl(phys_addr_t addr, unsigned int *val); extern int qcom_scm_io_writel(phys_addr_t addr, unsigned int val); #else @@ -97,6 +98,7 @@ qcom_scm_set_remote_state(u32 state,u32 id) { return -ENODEV; } static inline int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare) { return -ENODEV; } static inline int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size) { return -ENODEV; } static inline int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare) { return -ENODEV; } +static inline int qcom_scm_qsmmu500_wait_safe_toggle(bool en) { return -ENODEV; } static inline int qcom_scm_io_readl(phys_addr_t addr, unsigned int *val) { return -ENODEV; } static inline int qcom_scm_io_writel(phys_addr_t addr, unsigned int val) { return -ENODEV; } #endif -- cgit v1.2.3 From f16583614222d015968541f2e50447c67c277f74 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 27 Sep 2019 14:51:34 -0700 Subject: nvme-fc: Sync nvme-fc header to FC-NVME-2 Sync the header to FC-NVME-2 r1.06 (T11-2019-00210-v001). Includes some minor mods where pre-release field names changed by the time the spec was released. Signed-off-by: James Smart Signed-off-by: Keith Busch Signed-off-by: Jens Axboe --- include/linux/nvme-fc.h | 182 ++++++++++++++++++++++++++++++++++++------------ 1 file changed, 137 insertions(+), 45 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nvme-fc.h b/include/linux/nvme-fc.h index 067c9fea64fe..e8c30b39bb27 100644 --- a/include/linux/nvme-fc.h +++ b/include/linux/nvme-fc.h @@ -4,33 +4,60 @@ */ /* - * This file contains definitions relative to FC-NVME r1.14 (16-020vB). - * The fcnvme_lsdesc_cr_assoc_cmd struct reflects expected r1.16 content. + * This file contains definitions relative to FC-NVME-2 r1.06 + * (T11-2019-00210-v001). */ #ifndef _NVME_FC_H #define _NVME_FC_H 1 +#include -#define NVME_CMD_SCSI_ID 0xFD +#define NVME_CMD_FORMAT_ID 0xFD #define NVME_CMD_FC_ID FC_TYPE_NVME /* FC-NVME Cmd IU Flags */ -#define FCNVME_CMD_FLAGS_DIRMASK 0x03 -#define FCNVME_CMD_FLAGS_WRITE 0x01 -#define FCNVME_CMD_FLAGS_READ 0x02 +enum { + FCNVME_CMD_FLAGS_DIRMASK = 0x03, + FCNVME_CMD_FLAGS_WRITE = (1 << 0), + FCNVME_CMD_FLAGS_READ = (1 << 1), + + FCNVME_CMD_FLAGS_PICWP = (1 << 2), +}; + +enum { + FCNVME_CMD_CAT_MASK = 0x0F, + FCNVME_CMD_CAT_ADMINQ = 0x01, + FCNVME_CMD_CAT_CSSMASK = 0x07, + FCNVME_CMD_CAT_CSSFLAG = 0x08, +}; + +static inline __u8 fccmnd_set_cat_admin(__u8 rsv_cat) +{ + return (rsv_cat & ~FCNVME_CMD_CAT_MASK) | FCNVME_CMD_CAT_ADMINQ; +} + +static inline __u8 fccmnd_set_cat_css(__u8 rsv_cat, __u8 css) +{ + return (rsv_cat & ~FCNVME_CMD_CAT_MASK) | FCNVME_CMD_CAT_CSSFLAG | + (css & FCNVME_CMD_CAT_CSSMASK); +} struct nvme_fc_cmd_iu { - __u8 scsi_id; + __u8 format_id; __u8 fc_id; __be16 iu_len; - __u8 rsvd4[3]; + __u8 rsvd4[2]; + __u8 rsv_cat; __u8 flags; __be64 connection_id; __be32 csn; __be32 data_len; struct nvme_command sqe; - __be32 rsvd88[2]; + __u8 dps; + __u8 lbads; + __be16 ms; + __be32 rsvd92; }; #define NVME_FC_SIZEOF_ZEROS_RSP 12 @@ -38,11 +65,12 @@ struct nvme_fc_cmd_iu { enum { FCNVME_SC_SUCCESS = 0, FCNVME_SC_INVALID_FIELD = 1, - FCNVME_SC_INVALID_CONNID = 2, + /* reserved 2 */ + FCNVME_SC_ILL_CONN_PARAMS = 3, }; struct nvme_fc_ersp_iu { - __u8 status_code; + __u8 ersp_result; __u8 rsvd1; __be16 iu_len; __be32 rsn; @@ -53,14 +81,44 @@ struct nvme_fc_ersp_iu { }; -/* FC-NVME Link Services */ +#define FCNVME_NVME_SR_OPCODE 0x01 + +struct nvme_fc_nvme_sr_iu { + __u8 fc_id; + __u8 opcode; + __u8 rsvd2; + __u8 retry_rctl; + __be32 rsvd4; +}; + + +enum { + FCNVME_SRSTAT_ACC = 0x0, + FCNVME_SRSTAT_INV_FCID = 0x1, + /* reserved 0x2 */ + FCNVME_SRSTAT_LOGICAL_ERR = 0x3, + FCNVME_SRSTAT_INV_QUALIF = 0x4, + FCNVME_SRSTAT_UNABL2PERFORM = 0x9, +}; + +struct nvme_fc_nvme_sr_rsp_iu { + __u8 fc_id; + __u8 opcode; + __u8 rsvd2; + __u8 status; + __be32 rsvd4; +}; + + +/* FC-NVME Link Services - LS cmd values (w0 bits 31:24) */ enum { FCNVME_LS_RSVD = 0, FCNVME_LS_RJT = 1, FCNVME_LS_ACC = 2, - FCNVME_LS_CREATE_ASSOCIATION = 3, - FCNVME_LS_CREATE_CONNECTION = 4, - FCNVME_LS_DISCONNECT = 5, + FCNVME_LS_CREATE_ASSOCIATION = 3, /* Create Association */ + FCNVME_LS_CREATE_CONNECTION = 4, /* Create I/O Connection */ + FCNVME_LS_DISCONNECT_ASSOC = 5, /* Disconnect Association */ + FCNVME_LS_DISCONNECT_CONN = 6, /* Disconnect Connection */ }; /* FC-NVME Link Service Descriptors */ @@ -117,14 +175,17 @@ enum fcnvme_ls_rjt_reason { FCNVME_RJT_RC_UNSUP = 0x0b, /* command not supported */ - FCNVME_RJT_RC_INPROG = 0x0e, - /* command already in progress */ - FCNVME_RJT_RC_INV_ASSOC = 0x40, - /* Invalid Association ID*/ + /* Invalid Association ID */ FCNVME_RJT_RC_INV_CONN = 0x41, - /* Invalid Connection ID*/ + /* Invalid Connection ID */ + + FCNVME_RJT_RC_INV_PARAM = 0x42, + /* Invalid Parameters */ + + FCNVME_RJT_RC_INSUF_RES = 0x43, + /* Insufficient Resources */ FCNVME_RJT_RC_VENDOR = 0xff, /* vendor specific error */ @@ -138,14 +199,32 @@ enum fcnvme_ls_rjt_explan { FCNVME_RJT_EXP_OXID_RXID = 0x17, /* invalid OX_ID-RX_ID combination */ - FCNVME_RJT_EXP_INSUF_RES = 0x29, - /* insufficient resources */ - FCNVME_RJT_EXP_UNAB_DATA = 0x2a, /* unable to supply requested data */ FCNVME_RJT_EXP_INV_LEN = 0x2d, /* Invalid payload length */ + + FCNVME_RJT_EXP_INV_ERSP_RAT = 0x40, + /* Invalid NVMe_ERSP Ratio */ + + FCNVME_RJT_EXP_INV_CTLR_ID = 0x41, + /* Invalid Controller ID */ + + FCNVME_RJT_EXP_INV_QUEUE_ID = 0x42, + /* Invalid Queue ID */ + + FCNVME_RJT_EXP_INV_SQSIZE = 0x43, + /* Invalid Submission Queue Size */ + + FCNVME_RJT_EXP_INV_HOSTID = 0x44, + /* Invalid HOST ID */ + + FCNVME_RJT_EXP_INV_HOSTNQN = 0x45, + /* Invalid HOSTNQN */ + + FCNVME_RJT_EXP_INV_SUBNQN = 0x46, + /* Invalid SUBNQN */ }; /* FCNVME_LSDESC_RJT */ @@ -209,21 +288,11 @@ struct fcnvme_lsdesc_cr_conn_cmd { __be32 rsvd52; }; -/* Disconnect Scope Values */ -enum { - FCNVME_DISCONN_ASSOCIATION = 0, - FCNVME_DISCONN_CONNECTION = 1, -}; - /* FCNVME_LSDESC_DISCONN_CMD */ struct fcnvme_lsdesc_disconn_cmd { __be32 desc_tag; /* FCNVME_LSDESC_xxx */ __be32 desc_len; - u8 rsvd8[3]; - /* note: scope is really a 1 bit field */ - u8 scope; /* FCNVME_DISCONN_xxx */ - __be32 rsvd12; - __be64 id; + __be32 rsvd8[4]; }; /* FCNVME_LSDESC_CONN_ID */ @@ -242,9 +311,14 @@ struct fcnvme_lsdesc_assoc_id { /* r_ctl values */ enum { - FCNVME_RS_RCTL_DATA = 1, - FCNVME_RS_RCTL_XFER_RDY = 5, - FCNVME_RS_RCTL_RSP = 8, + FCNVME_RS_RCTL_CMND = 0x6, + FCNVME_RS_RCTL_DATA = 0x1, + FCNVME_RS_RCTL_CONF = 0x3, + FCNVME_RS_RCTL_SR = 0x9, + FCNVME_RS_RCTL_XFER_RDY = 0x5, + FCNVME_RS_RCTL_RSP = 0x7, + FCNVME_RS_RCTL_ERSP = 0x8, + FCNVME_RS_RCTL_SR_RSP = 0xA, }; @@ -264,7 +338,10 @@ struct fcnvme_ls_acc_hdr { struct fcnvme_ls_rqst_w0 w0; __be32 desc_list_len; struct fcnvme_lsdesc_rqst rqst; - /* Followed by cmd-specific ACC descriptors, see next definitions */ + /* + * Followed by cmd-specific ACCEPT descriptors, see xxx_acc + * definitions below + */ }; /* FCNVME_LS_CREATE_ASSOCIATION */ @@ -302,25 +379,39 @@ struct fcnvme_ls_cr_conn_acc { struct fcnvme_lsdesc_conn_id connectid; }; -/* FCNVME_LS_DISCONNECT */ -struct fcnvme_ls_disconnect_rqst { +/* FCNVME_LS_DISCONNECT_ASSOC */ +struct fcnvme_ls_disconnect_assoc_rqst { struct fcnvme_ls_rqst_w0 w0; __be32 desc_list_len; struct fcnvme_lsdesc_assoc_id associd; struct fcnvme_lsdesc_disconn_cmd discon_cmd; }; -struct fcnvme_ls_disconnect_acc { +struct fcnvme_ls_disconnect_assoc_acc { + struct fcnvme_ls_acc_hdr hdr; +}; + + +/* FCNVME_LS_DISCONNECT_CONN */ +struct fcnvme_ls_disconnect_conn_rqst { + struct fcnvme_ls_rqst_w0 w0; + __be32 desc_list_len; + struct fcnvme_lsdesc_assoc_id associd; + struct fcnvme_lsdesc_disconn_cmd connectid; +}; + +struct fcnvme_ls_disconnect_conn_acc { struct fcnvme_ls_acc_hdr hdr; }; /* - * Yet to be defined in FC-NVME: + * Default R_A_TOV is pulled in from fc_fs.h but needs conversion + * from ms to seconds for our use. */ -#define NVME_FC_CONNECT_TIMEOUT_SEC 2 /* 2 seconds */ -#define NVME_FC_LS_TIMEOUT_SEC 2 /* 2 seconds */ -#define NVME_FC_TGTOP_TIMEOUT_SEC 2 /* 2 seconds */ +#define FC_TWO_TIMES_R_A_TOV (2 * (FC_DEF_R_A_TOV / 1000)) +#define NVME_FC_LS_TIMEOUT_SEC FC_TWO_TIMES_R_A_TOV +#define NVME_FC_TGTOP_TIMEOUT_SEC FC_TWO_TIMES_R_A_TOV /* * TRADDR string must be of form "nn-<16hexdigits>:pn-<16hexdigits>" @@ -328,6 +419,7 @@ struct fcnvme_ls_disconnect_acc { * infront of the <16hexdigits>. Without is considered the "min" string * and with is considered the "max" string. The hexdigits may be upper * or lower case. + * Note: FC-NVME-2 standard requires a "0x" prefix. */ #define NVME_FC_TRADDR_NNLEN 3 /* "?n-" */ #define NVME_FC_TRADDR_OXNNLEN 5 /* "?n-0x" */ -- cgit v1.2.3 From 2dc3947b53f573e8a75ea9cbec5588df88ca502e Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Sun, 13 Oct 2019 19:57:35 +0300 Subject: nvme: introduce "Command Aborted By host" status code Fix the status code of canceled requests initiated by the host according to TP4028 (Status Code 0x371): "Command Aborted By host: The command was aborted as a result of host action (e.g., the host disconnected the Fabric connection)." Also in a multipath environment, unless otherwise specified, errors of this type (path related) should be retried using a different path, if one is available. Signed-off-by: Max Gurtovoy Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch Signed-off-by: Jens Axboe --- include/linux/nvme.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index f61d6906e59d..a260cd754f28 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -1368,6 +1368,7 @@ enum { NVME_SC_ANA_INACCESSIBLE = 0x302, NVME_SC_ANA_TRANSITION = 0x303, NVME_SC_HOST_PATH_ERROR = 0x370, + NVME_SC_HOST_ABORTED_CMD = 0x371, NVME_SC_CRD = 0x1800, NVME_SC_DNR = 0x4000, -- cgit v1.2.3 From 48c9e85b23464a7d1e3ebd70b79cc3a2d97d3222 Mon Sep 17 00:00:00 2001 From: Revanth Rajashekar Date: Mon, 14 Oct 2019 11:16:07 -0600 Subject: nvme: resync include/linux/nvme.h with nvmecli Update enumerations and structures in include/linux/nvme.h to resync with the nvmecli. All the updates are mentioned in the ratified NVMe 1.4 spec https://nvmexpress.org/wp-content/uploads/NVM-Express-1_4-2019.06.10-Ratified.pdf Reviewed-by: Christoph Hellwig Signed-off-by: Revanth Rajashekar Signed-off-by: Keith Busch Signed-off-by: Jens Axboe --- include/linux/nvme.h | 53 +++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index a260cd754f28..3eca4f7d8510 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -107,8 +107,22 @@ enum { NVME_REG_AQA = 0x0024, /* Admin Queue Attributes */ NVME_REG_ASQ = 0x0028, /* Admin SQ Base Address */ NVME_REG_ACQ = 0x0030, /* Admin CQ Base Address */ - NVME_REG_CMBLOC = 0x0038, /* Controller Memory Buffer Location */ + NVME_REG_CMBLOC = 0x0038, /* Controller Memory Buffer Location */ NVME_REG_CMBSZ = 0x003c, /* Controller Memory Buffer Size */ + NVME_REG_BPINFO = 0x0040, /* Boot Partition Information */ + NVME_REG_BPRSEL = 0x0044, /* Boot Partition Read Select */ + NVME_REG_BPMBL = 0x0048, /* Boot Partition Memory Buffer + * Location + */ + NVME_REG_PMRCAP = 0x0e00, /* Persistent Memory Capabilities */ + NVME_REG_PMRCTL = 0x0e04, /* Persistent Memory Region Control */ + NVME_REG_PMRSTS = 0x0e08, /* Persistent Memory Region Status */ + NVME_REG_PMREBS = 0x0e0c, /* Persistent Memory Region Elasticity + * Buffer Size + */ + NVME_REG_PMRSWTP = 0x0e10, /* Persistent Memory Region Sustained + * Write Throughput + */ NVME_REG_DBS = 0x1000, /* SQ 0 Tail Doorbell */ }; @@ -295,6 +309,14 @@ enum { NVME_CTRL_OACS_DIRECTIVES = 1 << 5, NVME_CTRL_OACS_DBBUF_SUPP = 1 << 8, NVME_CTRL_LPA_CMD_EFFECTS_LOG = 1 << 1, + NVME_CTRL_CTRATT_128_ID = 1 << 0, + NVME_CTRL_CTRATT_NON_OP_PSP = 1 << 1, + NVME_CTRL_CTRATT_NVM_SETS = 1 << 2, + NVME_CTRL_CTRATT_READ_RECV_LVLS = 1 << 3, + NVME_CTRL_CTRATT_ENDURANCE_GROUPS = 1 << 4, + NVME_CTRL_CTRATT_PREDICTABLE_LAT = 1 << 5, + NVME_CTRL_CTRATT_NAMESPACE_GRANULARITY = 1 << 7, + NVME_CTRL_CTRATT_UUID_LIST = 1 << 9, }; struct nvme_lbaf { @@ -352,6 +374,9 @@ enum { NVME_ID_CNS_NS_PRESENT = 0x11, NVME_ID_CNS_CTRL_NS_LIST = 0x12, NVME_ID_CNS_CTRL_LIST = 0x13, + NVME_ID_CNS_SCNDRY_CTRL_LIST = 0x15, + NVME_ID_CNS_NS_GRANULARITY = 0x16, + NVME_ID_CNS_UUID_LIST = 0x17, }; enum { @@ -409,7 +434,8 @@ struct nvme_smart_log { __u8 avail_spare; __u8 spare_thresh; __u8 percent_used; - __u8 rsvd6[26]; + __u8 endu_grp_crit_warn_sumry; + __u8 rsvd7[25]; __u8 data_units_read[16]; __u8 data_units_written[16]; __u8 host_reads[16]; @@ -423,7 +449,11 @@ struct nvme_smart_log { __le32 warning_temp_time; __le32 critical_comp_time; __le16 temp_sensor[8]; - __u8 rsvd216[296]; + __le32 thm_temp1_trans_count; + __le32 thm_temp2_trans_count; + __le32 thm_temp1_total_time; + __le32 thm_temp2_total_time; + __u8 rsvd232[280]; }; struct nvme_fw_slot_info_log { @@ -440,6 +470,7 @@ enum { NVME_CMD_EFFECTS_NIC = 1 << 3, NVME_CMD_EFFECTS_CCC = 1 << 4, NVME_CMD_EFFECTS_CSE_MASK = 3 << 16, + NVME_CMD_EFFECTS_UUID_SEL = 1 << 19, }; struct nvme_effects_log { @@ -563,6 +594,7 @@ enum nvme_opcode { nvme_cmd_compare = 0x05, nvme_cmd_write_zeroes = 0x08, nvme_cmd_dsm = 0x09, + nvme_cmd_verify = 0x0c, nvme_cmd_resv_register = 0x0d, nvme_cmd_resv_report = 0x0e, nvme_cmd_resv_acquire = 0x11, @@ -806,10 +838,14 @@ enum nvme_admin_opcode { nvme_admin_ns_mgmt = 0x0d, nvme_admin_activate_fw = 0x10, nvme_admin_download_fw = 0x11, + nvme_admin_dev_self_test = 0x14, nvme_admin_ns_attach = 0x15, nvme_admin_keep_alive = 0x18, nvme_admin_directive_send = 0x19, nvme_admin_directive_recv = 0x1a, + nvme_admin_virtual_mgmt = 0x1c, + nvme_admin_nvme_mi_send = 0x1d, + nvme_admin_nvme_mi_recv = 0x1e, nvme_admin_dbbuf = 0x7C, nvme_admin_format_nvm = 0x80, nvme_admin_security_send = 0x81, @@ -873,6 +909,7 @@ enum { NVME_FEAT_PLM_CONFIG = 0x13, NVME_FEAT_PLM_WINDOW = 0x14, NVME_FEAT_HOST_BEHAVIOR = 0x16, + NVME_FEAT_SANITIZE = 0x17, NVME_FEAT_SW_PROGRESS = 0x80, NVME_FEAT_HOST_ID = 0x81, NVME_FEAT_RESV_MASK = 0x82, @@ -883,6 +920,10 @@ enum { NVME_LOG_FW_SLOT = 0x03, NVME_LOG_CHANGED_NS = 0x04, NVME_LOG_CMD_EFFECTS = 0x05, + NVME_LOG_DEVICE_SELF_TEST = 0x06, + NVME_LOG_TELEMETRY_HOST = 0x07, + NVME_LOG_TELEMETRY_CTRL = 0x08, + NVME_LOG_ENDURANCE_GROUP = 0x09, NVME_LOG_ANA = 0x0c, NVME_LOG_DISC = 0x70, NVME_LOG_RESERVATION = 0x80, @@ -1290,7 +1331,11 @@ enum { NVME_SC_SGL_INVALID_OFFSET = 0x16, NVME_SC_SGL_INVALID_SUBTYPE = 0x17, + NVME_SC_SANITIZE_FAILED = 0x1C, + NVME_SC_SANITIZE_IN_PROGRESS = 0x1D, + NVME_SC_NS_WRITE_PROTECTED = 0x20, + NVME_SC_CMD_INTERRUPTED = 0x21, NVME_SC_LBA_RANGE = 0x80, NVME_SC_CAP_EXCEEDED = 0x81, @@ -1328,6 +1373,8 @@ enum { NVME_SC_NS_NOT_ATTACHED = 0x11a, NVME_SC_THIN_PROV_NOT_SUPP = 0x11b, NVME_SC_CTRL_LIST_INVALID = 0x11c, + NVME_SC_BP_WRITE_PROHIBITED = 0x11e, + NVME_SC_PMR_SAN_PROHIBITED = 0x123, /* * I/O Command Set Specific - NVM commands: -- cgit v1.2.3 From 0c65b2b90d13c1deaee6449304dd367c5d4eb8ae Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 4 Nov 2019 02:40:33 +0100 Subject: net: of_get_phy_mode: Change API to solve int/unit warnings Before this change of_get_phy_mode() returned an enum, phy_interface_t. On error, -ENODEV etc, is returned. If the result of the function is stored in a variable of type phy_interface_t, and the compiler has decided to represent this as an unsigned int, comparision with -ENODEV etc, is a signed vs unsigned comparision. Fix this problem by changing the API. Make the function return an error, or 0 on success, and pass a pointer, of type phy_interface_t, where the phy mode should be stored. v2: Return with *interface set to PHY_INTERFACE_MODE_NA on error. Add error checks to all users of of_get_phy_mode() Fixup a few reverse christmas tree errors Fixup a few slightly malformed reverse christmas trees v3: Fix 0-day reported errors. Reported-by: Dan Carpenter Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/of_net.h | 7 +++++-- include/linux/stmmac.h | 3 ++- include/linux/sxgbe_platform.h | 4 +++- 3 files changed, 10 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of_net.h b/include/linux/of_net.h index 6aeaea1775e6..71bbfcf3adcd 100644 --- a/include/linux/of_net.h +++ b/include/linux/of_net.h @@ -6,15 +6,18 @@ #ifndef __LINUX_OF_NET_H #define __LINUX_OF_NET_H +#include + #ifdef CONFIG_OF_NET #include struct net_device; -extern int of_get_phy_mode(struct device_node *np); +extern int of_get_phy_mode(struct device_node *np, phy_interface_t *interface); extern const void *of_get_mac_address(struct device_node *np); extern struct net_device *of_find_net_device_by_node(struct device_node *np); #else -static inline int of_get_phy_mode(struct device_node *np) +static inline int of_get_phy_mode(struct device_node *np, + phy_interface_t *interface) { return -ENODEV; } diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 86f9464c3f5d..d4bcd9387136 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -13,6 +13,7 @@ #define __STMMAC_PLATFORM_DATA #include +#include #define MTL_MAX_RX_QUEUES 8 #define MTL_MAX_TX_QUEUES 8 @@ -132,7 +133,7 @@ struct plat_stmmacenet_data { int bus_id; int phy_addr; int interface; - int phy_interface; + phy_interface_t phy_interface; struct stmmac_mdio_bus_data *mdio_bus_data; struct device_node *phy_node; struct device_node *phylink_node; diff --git a/include/linux/sxgbe_platform.h b/include/linux/sxgbe_platform.h index 267369110584..85ec745767bd 100644 --- a/include/linux/sxgbe_platform.h +++ b/include/linux/sxgbe_platform.h @@ -10,6 +10,8 @@ #ifndef __SXGBE_PLATFORM_H__ #define __SXGBE_PLATFORM_H__ +#include + /* MDC Clock Selection define*/ #define SXGBE_CSR_100_150M 0x0 /* MDC = clk_scr_i/62 */ #define SXGBE_CSR_150_250M 0x1 /* MDC = clk_scr_i/102 */ @@ -38,7 +40,7 @@ struct sxgbe_plat_data { char *phy_bus_name; int bus_id; int phy_addr; - int interface; + phy_interface_t interface; struct sxgbe_mdio_bus_data *mdio_bus_data; struct sxgbe_dma_cfg *dma_cfg; int clk_csr; -- cgit v1.2.3 From 205577ab6f7ade6185f764ed78fb6875dca40205 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 25 Oct 2019 19:08:36 +0100 Subject: iommu/io-pgtable-arm: Rationalise MAIR handling Between VMSAv8-64 and the various 32-bit formats, there is either one 64-bit MAIR or a pair of 32-bit MAIR0/MAIR1 or NMRR/PMRR registers. As such, keeping two 64-bit values in io_pgtable_cfg has always been overkill. Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- include/linux/io-pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index ec7a13405f10..ee21eedafe98 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -102,7 +102,7 @@ struct io_pgtable_cfg { struct { u64 ttbr[2]; u64 tcr; - u64 mair[2]; + u64 mair; } arm_lpae_s1_cfg; struct { -- cgit v1.2.3 From f188b5e76aae9f713c73708d2ba57b65953ce207 Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Mon, 4 Nov 2019 11:12:38 -0700 Subject: coresight: etm4x: Save/restore state across CPU low power states Some hardware will ignore bit TRCPDCR.PU which is used to signal to hardware that power should not be removed from the trace unit. Let's mitigate against this by conditionally saving and restoring the trace unit state when the CPU enters low power states. This patchset introduces a firmware property named 'arm,coresight-loses-context-with-cpu' - when this is present the hardware state will be conditionally saved and restored. A module parameter 'pm_save_enable' is also introduced which can be configured to override the firmware property. This can be set to never allow save/restore or to conditionally allow it (only for self-hosted). The default value is determined by firmware. We avoid saving the hardware state when self-hosted coresight isn't in use to reduce PM latency - we can't determine this by reading the claim tags (TRCCLAIMCLR) as these are 'trace' registers which need power and clocking, something we can't easily provide in the PM context. Therefore we rely on the existing drvdata->mode internal state that is set when self-hosted coresight is used (and powered). Signed-off-by: Andrew Murray Reviewed-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Link: https://lore.kernel.org/r/20191104181251.26732-2-mathieu.poirier@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/coresight.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/coresight.h b/include/linux/coresight.h index a2b68823717b..44e552de419c 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -285,6 +285,8 @@ extern void coresight_disclaim_device(void __iomem *base); extern void coresight_disclaim_device_unlocked(void __iomem *base); extern char *coresight_alloc_device_name(struct coresight_dev_list *devs, struct device *dev); + +extern bool coresight_loses_context_with_cpu(struct device *dev); #else static inline struct coresight_device * coresight_register(struct coresight_desc *desc) { return NULL; } @@ -307,6 +309,10 @@ static inline int coresight_claim_device(void __iomem *base) static inline void coresight_disclaim_device(void __iomem *base) {} static inline void coresight_disclaim_device_unlocked(void __iomem *base) {} +static inline bool coresight_loses_context_with_cpu(struct device *dev) +{ + return false; +} #endif extern int coresight_get_cpu(struct device *dev); -- cgit v1.2.3 From b8104fda1fff0882e43b7e98832a76d7e98eb3e9 Mon Sep 17 00:00:00 2001 From: John Garry Date: Tue, 5 Nov 2019 01:22:16 +0800 Subject: logic_pio: Define PIO_INDIRECT_SIZE for !CONFIG_INDIRECT_PIO With the goal of expanding the test coverage of the HiSi LPC driver to !ARM64, define a dummy PIO_INDIRECT_SIZE for !CONFIG_INDIRECT_PIO, which is required by the named driver. Signed-off-by: John Garry Signed-off-by: Wei Xu --- include/linux/logic_pio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/logic_pio.h b/include/linux/logic_pio.h index 88e1e6304a71..54945aa824b4 100644 --- a/include/linux/logic_pio.h +++ b/include/linux/logic_pio.h @@ -108,10 +108,10 @@ void logic_outsl(unsigned long addr, const void *buffer, unsigned int count); * area by redefining the macro below. */ #define PIO_INDIRECT_SIZE 0x4000 -#define MMIO_UPPER_LIMIT (IO_SPACE_LIMIT - PIO_INDIRECT_SIZE) #else -#define MMIO_UPPER_LIMIT IO_SPACE_LIMIT +#define PIO_INDIRECT_SIZE 0 #endif /* CONFIG_INDIRECT_PIO */ +#define MMIO_UPPER_LIMIT (IO_SPACE_LIMIT - PIO_INDIRECT_SIZE) struct logic_pio_hwaddr *find_io_range_by_fwnode(struct fwnode_handle *fwnode); unsigned long logic_pio_trans_hwaddr(struct fwnode_handle *fwnode, -- cgit v1.2.3 From add3efdd78b8a0478ce423bb9d4df6bd95e8b335 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 5 Nov 2019 17:44:07 +0100 Subject: jbd2: Fix possible overflow in jbd2_log_space_left() When number of free space in the journal is very low, the arithmetic in jbd2_log_space_left() could underflow resulting in very high number of free blocks and thus triggering assertion failure in transaction commit code complaining there's not enough space in the journal: J_ASSERT(journal->j_free > 1); Properly check for the low number of free blocks. CC: stable@vger.kernel.org Reviewed-by: Theodore Ts'o Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20191105164437.32602-1-jack@suse.cz Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 603fbc4e2f70..10e6049c0ba9 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1582,7 +1582,7 @@ static inline int jbd2_space_needed(journal_t *journal) static inline unsigned long jbd2_log_space_left(journal_t *journal) { /* Allow for rounding errors */ - unsigned long free = journal->j_free - 32; + long free = journal->j_free - 32; if (journal->j_committing_transaction) { unsigned long committing = atomic_read(&journal-> @@ -1591,7 +1591,7 @@ static inline unsigned long jbd2_log_space_left(journal_t *journal) /* Transaction + control blocks */ free -= committing + (committing >> JBD2_CONTROL_BLOCKS_SHIFT); } - return free; + return max_t(long, free, 0); } /* -- cgit v1.2.3 From 8670b2b8b029a6650d133486be9d2ace146fd29a Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 24 Oct 2019 19:40:42 +0200 Subject: rfkill: allocate static minor udev has a feature of creating /dev/ device-nodes if it finds a devnode: modalias. This allows for auto-loading of modules that provide the node. This requires to use a statically allocated minor number for misc character devices. However, rfkill uses dynamic minor numbers and prevents auto-loading of the module. So allocate the next static misc minor number and use it for rfkill. Signed-off-by: Marcel Holtmann Link: https://lore.kernel.org/r/20191024174042.19851-1-marcel@holtmann.org Signed-off-by: Greg Kroah-Hartman --- include/linux/miscdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index 3247a3dc7934..b06b75776a32 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -57,6 +57,7 @@ #define UHID_MINOR 239 #define USERIO_MINOR 240 #define VHOST_VSOCK_MINOR 241 +#define RFKILL_MINOR 242 #define MISC_DYNAMIC_MINOR 255 struct device; -- cgit v1.2.3 From b873af620e58863b70ae9cf97f6fab4cf4c544af Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 22 Oct 2019 10:43:13 +0200 Subject: lib: devres: provide devm_ioremap_resource_wc() Provide a variant of devm_ioremap_resource() for write-combined ioremap. Signed-off-by: Bartosz Golaszewski Reviewed-by: Arnd Bergmann Link: https://lore.kernel.org/r/20191022084318.22256-4-brgl@bgdev.pl Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index d1bcc8f122f6..f05c5b92e61f 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -962,6 +962,8 @@ extern void devm_free_pages(struct device *dev, unsigned long addr); void __iomem *devm_ioremap_resource(struct device *dev, const struct resource *res); +void __iomem *devm_ioremap_resource_wc(struct device *dev, + const struct resource *res); void __iomem *devm_of_iomap(struct device *dev, struct device_node *node, int index, -- cgit v1.2.3 From bb6243b4f73d29f8c8faf8f805a2042ac3973b71 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 22 Oct 2019 10:43:14 +0200 Subject: drivers: platform: provide devm_platform_ioremap_resource_wc() Provide a write-combined variant of devm_platform_ioremap_resource(). Signed-off-by: Bartosz Golaszewski Reviewed-by: Arnd Bergmann Link: https://lore.kernel.org/r/20191022084318.22256-5-brgl@bgdev.pl Signed-off-by: Greg Kroah-Hartman --- include/linux/platform_device.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 51fb7fc28587..91fcdbbae89d 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -57,6 +57,9 @@ platform_find_device_by_driver(struct device *start, extern void __iomem * devm_platform_ioremap_resource(struct platform_device *pdev, unsigned int index); +extern void __iomem * +devm_platform_ioremap_resource_wc(struct platform_device *pdev, + unsigned int index); extern int platform_get_irq(struct platform_device *, unsigned int); extern int platform_get_irq_optional(struct platform_device *, unsigned int); extern int platform_irq_count(struct platform_device *); -- cgit v1.2.3 From c9c8641d3ebd79274af75f7df3e6a9c6cc8a66e9 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 22 Oct 2019 10:43:16 +0200 Subject: drivers: provide devm_platform_ioremap_resource_byname() Provide a variant of devm_platform_ioremap_resource() that allows to lookup resources from platform devices by name rather than by index. Signed-off-by: Bartosz Golaszewski Reviewed-by: Arnd Bergmann Link: https://lore.kernel.org/r/20191022084318.22256-7-brgl@bgdev.pl Signed-off-by: Greg Kroah-Hartman --- include/linux/platform_device.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 91fcdbbae89d..276a03c24691 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -60,6 +60,9 @@ devm_platform_ioremap_resource(struct platform_device *pdev, extern void __iomem * devm_platform_ioremap_resource_wc(struct platform_device *pdev, unsigned int index); +extern void __iomem * +devm_platform_ioremap_resource_byname(struct platform_device *pdev, + const char *name); extern int platform_get_irq(struct platform_device *, unsigned int); extern int platform_get_irq_optional(struct platform_device *, unsigned int); extern int platform_irq_count(struct platform_device *); -- cgit v1.2.3 From 9b8303fc6efa724bd6a90656434fbde2cc6ceb2c Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Tue, 29 Oct 2019 11:42:31 +0000 Subject: nvmem: core: fix nvmem_cell_write inline function nvmem_cell_write's buf argument uses different types based on the configuration of CONFIG_NVMEM. The function prototype for enabled NVMEM uses 'void *' type, but the static dummy function for disabled NVMEM uses 'const char *' instead. Fix the different behaviour by always expecting a 'void *' typed buf argument. Fixes: 7a78a7f7695b ("power: reset: nvmem-reboot-mode: use NVMEM as reboot mode write interface") Reported-by: kbuild test robot Cc: Han Nandor Cc: Srinivas Kandagatla Cc: linux-kernel@vger.kernel.org Signed-off-by: Sebastian Reichel Reviewed-By: Han Nandor Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20191029114240.14905-2-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/nvmem-consumer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nvmem-consumer.h b/include/linux/nvmem-consumer.h index 8f8be5b00060..5c17cb733224 100644 --- a/include/linux/nvmem-consumer.h +++ b/include/linux/nvmem-consumer.h @@ -118,7 +118,7 @@ static inline void *nvmem_cell_read(struct nvmem_cell *cell, size_t *len) } static inline int nvmem_cell_write(struct nvmem_cell *cell, - const char *buf, size_t len) + void *buf, size_t len) { return -EOPNOTSUPP; } -- cgit v1.2.3 From 8adeac3be03d400f9c2391d52f85cd27bd188800 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 2 Oct 2019 14:03:41 -0500 Subject: dm stripe: use struct_size() in kmalloc() One of the more common cases of allocation size calculations is finding the size of a structure that has a zero-sized array at the end, along with memory for some number of elements for that array. For example: struct stripe_c { ... struct stripe stripe[0]; }; In this case alloc_context() and dm_array_too_big() are removed and replaced by the direct use of the struct_size() helper in kmalloc(). Notice that open-coded form is prone to type mistakes. This code was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Mike Snitzer --- include/linux/device-mapper.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 399ad8632356..2e13826898b2 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -594,9 +594,6 @@ void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size); */ #define dm_round_up(n, sz) (dm_div_up((n), (sz)) * (sz)) -#define dm_array_too_big(fixed, obj, num) \ - ((num) > (UINT_MAX - (fixed)) / (obj)) - /* * Sector offset taken relative to the start of the target instead of * relative to the start of the device. -- cgit v1.2.3 From a9a8344ee1714f835ba394077e8c13d751e2f148 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 5 Nov 2019 17:44:17 +0100 Subject: ext4, jbd2: Provide accessor function for handle credits Provide accessor function to get number of credits available in a handle and use it from ext4. Later, computation of available credits won't be so straightforward. Reviewed-by: Theodore Ts'o Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20191105164437.32602-11-jack@suse.cz Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 10e6049c0ba9..727ff91d7f3e 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1645,6 +1645,12 @@ static inline tid_t jbd2_get_latest_transaction(journal_t *journal) return tid; } + +static inline int jbd2_handle_buffer_credits(handle_t *handle) +{ + return handle->h_buffer_credits; +} + #ifdef __KERNEL__ #define buffer_trace_init(bh) do {} while (0) -- cgit v1.2.3 From 9f356e5a4f12008fa0df8b6385fc0ab830416e72 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 5 Nov 2019 17:44:24 +0100 Subject: jbd2: Account descriptor blocks into t_outstanding_credits Currently, journal descriptor blocks were not accounted in transaction->t_outstanding_credits and we were just leaving some slack space in the journal for them (in jbd2_log_space_left() and jbd2_space_needed()). This is making proper accounting (and reservation we want to add) of descriptor blocks difficult so switch to accounting descriptor blocks in transaction->t_outstanding_credits and just reserve the same amount of credits in t_outstanding credits for journal descriptor blocks when creating transaction. Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20191105164437.32602-18-jack@suse.cz Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 727ff91d7f3e..bef4f74b1ea0 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -681,8 +681,10 @@ struct transaction_s atomic_t t_updates; /* - * Number of buffers reserved for use by all handles in this transaction - * handle but not yet modified. [none] + * Number of blocks reserved for this transaction in the journal. + * This is including all credits reserved when starting transaction + * handles as well as all journal descriptor blocks needed for this + * transaction. [none] */ atomic_t t_outstanding_credits; @@ -1560,20 +1562,13 @@ static inline int jbd2_journal_has_csum_v2or3(journal_t *journal) return journal->j_chksum_driver != NULL; } -/* - * We reserve t_outstanding_credits >> JBD2_CONTROL_BLOCKS_SHIFT for - * transaction control blocks. - */ -#define JBD2_CONTROL_BLOCKS_SHIFT 5 - /* * Return the minimum number of blocks which must be free in the journal * before a new transaction may be started. Must be called under j_state_lock. */ static inline int jbd2_space_needed(journal_t *journal) { - int nblocks = journal->j_max_transaction_buffers; - return nblocks + (nblocks >> JBD2_CONTROL_BLOCKS_SHIFT); + return journal->j_max_transaction_buffers; } /* @@ -1585,11 +1580,8 @@ static inline unsigned long jbd2_log_space_left(journal_t *journal) long free = journal->j_free - 32; if (journal->j_committing_transaction) { - unsigned long committing = atomic_read(&journal-> - j_committing_transaction->t_outstanding_credits); - - /* Transaction + control blocks */ - free -= committing + (committing >> JBD2_CONTROL_BLOCKS_SHIFT); + free -= atomic_read(&journal-> + j_committing_transaction->t_outstanding_credits); } return max_t(long, free, 0); } -- cgit v1.2.3 From 77444ac4f9537bc4211f928959d5231445e30c6e Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 5 Nov 2019 17:44:25 +0100 Subject: jbd2: Drop jbd2_space_needed() The function is now just a trivial wrapper returning journal->j_max_transaction_buffers. Drop it. Reviewed-by: Theodore Ts'o Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20191105164437.32602-19-jack@suse.cz Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index bef4f74b1ea0..1dd2703a8e26 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1562,15 +1562,6 @@ static inline int jbd2_journal_has_csum_v2or3(journal_t *journal) return journal->j_chksum_driver != NULL; } -/* - * Return the minimum number of blocks which must be free in the journal - * before a new transaction may be started. Must be called under j_state_lock. - */ -static inline int jbd2_space_needed(journal_t *journal) -{ - return journal->j_max_transaction_buffers; -} - /* * Return number of free blocks in the log. Must be called under j_state_lock. */ -- cgit v1.2.3 From fdc3ef882a5d59c1709a13b5486ae2b1632e12b6 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 5 Nov 2019 17:44:26 +0100 Subject: jbd2: Reserve space for revoke descriptor blocks Extend functions for starting, extending, and restarting transaction handles to take number of revoke records handle must be able to accommodate. These functions then make sure transaction has enough credits to be able to store resulting revoke descriptor blocks. Also revoke code tracks number of revoke records created by a handle to catch situation where some place didn't reserve enough space for revoke records. Similarly to standard transaction credits, space for unused reserved revoke records is released when the handle is stopped. On the ext4 side we currently take a simplistic approach of reserving space for 1024 revoke records for any transaction. This grows amount of credits reserved for each handle only by a few and is enough for any normal workload so that we don't hit warnings in jbd2. We will refine the logic in following commits. Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20191105164437.32602-20-jack@suse.cz Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 43 +++++++++++++++++++++++++++++++++---------- 1 file changed, 33 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 1dd2703a8e26..2a3d5f50e7a1 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -478,6 +478,7 @@ struct jbd2_revoke_table_s; * @h_journal: Which journal handle belongs to - used iff h_reserved set. * @h_rsv_handle: Handle reserved for finishing the logical operation. * @h_buffer_credits: Number of remaining buffers we are allowed to dirty. + * @h_revoke_credits: Number of remaining revoke records available for handle * @h_ref: Reference count on this handle. * @h_err: Field for caller's use to track errors through large fs operations. * @h_sync: Flag for sync-on-close. @@ -488,6 +489,7 @@ struct jbd2_revoke_table_s; * @h_line_no: For handle statistics. * @h_start_jiffies: Handle Start time. * @h_requested_credits: Holds @h_buffer_credits after handle is started. + * @h_revoke_credits_requested: Holds @h_revoke_credits after handle is started. * @saved_alloc_context: Saved context while transaction is open. **/ @@ -505,6 +507,8 @@ struct jbd2_journal_handle handle_t *h_rsv_handle; int h_buffer_credits; + int h_revoke_credits; + int h_revoke_credits_requested; int h_ref; int h_err; @@ -688,6 +692,17 @@ struct transaction_s */ atomic_t t_outstanding_credits; + /* + * Number of revoke records for this transaction added by already + * stopped handles. [none] + */ + atomic_t t_outstanding_revokes; + + /* + * How many handles used this transaction? [none] + */ + atomic_t t_handle_count; + /* * Forward and backward links for the circular list of all transactions * awaiting checkpoint. [j_list_lock] @@ -705,11 +720,6 @@ struct transaction_s */ ktime_t t_start_time; - /* - * How many handles used this transaction? [none] - */ - atomic_t t_handle_count; - /* * This transaction is being forced and some process is * waiting for it to finish. @@ -1026,6 +1036,13 @@ struct journal_s */ int j_max_transaction_buffers; + /** + * @j_revoke_records_per_block: + * + * Number of revoke records that fit in one descriptor block. + */ + int j_revoke_records_per_block; + /** * @j_commit_interval: * @@ -1360,14 +1377,16 @@ static inline handle_t *journal_current_handle(void) extern handle_t *jbd2_journal_start(journal_t *, int nblocks); extern handle_t *jbd2__journal_start(journal_t *, int blocks, int rsv_blocks, - gfp_t gfp_mask, unsigned int type, - unsigned int line_no); + int revoke_records, gfp_t gfp_mask, + unsigned int type, unsigned int line_no); extern int jbd2_journal_restart(handle_t *, int nblocks); -extern int jbd2__journal_restart(handle_t *, int nblocks, gfp_t gfp_mask); +extern int jbd2__journal_restart(handle_t *, int nblocks, + int revoke_records, gfp_t gfp_mask); extern int jbd2_journal_start_reserved(handle_t *handle, unsigned int type, unsigned int line_no); extern void jbd2_journal_free_reserved(handle_t *handle); -extern int jbd2_journal_extend (handle_t *, int nblocks); +extern int jbd2_journal_extend(handle_t *handle, int nblocks, + int revoke_records); extern int jbd2_journal_get_write_access(handle_t *, struct buffer_head *); extern int jbd2_journal_get_create_access (handle_t *, struct buffer_head *); extern int jbd2_journal_get_undo_access(handle_t *, struct buffer_head *); @@ -1631,7 +1650,11 @@ static inline tid_t jbd2_get_latest_transaction(journal_t *journal) static inline int jbd2_handle_buffer_credits(handle_t *handle) { - return handle->h_buffer_credits; + journal_t *journal = handle->h_transaction->t_journal; + + return handle->h_buffer_credits - + DIV_ROUND_UP(handle->h_revoke_credits_requested, + journal->j_revoke_records_per_block); } #ifdef __KERNEL__ -- cgit v1.2.3 From 933f1c1e0b75bbc29730eef07c9e196c6dfd37e5 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 5 Nov 2019 17:44:27 +0100 Subject: jbd2: Rename h_buffer_credits to h_total_credits The credit counter now contains both buffer and revoke descriptor block credits. Rename to counter to h_total_credits to reflect that. No functional change. Reviewed-by: Theodore Ts'o Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20191105164437.32602-21-jack@suse.cz Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 2a3d5f50e7a1..3115eeb44039 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -477,7 +477,8 @@ struct jbd2_revoke_table_s; * @h_transaction: Which compound transaction is this update a part of? * @h_journal: Which journal handle belongs to - used iff h_reserved set. * @h_rsv_handle: Handle reserved for finishing the logical operation. - * @h_buffer_credits: Number of remaining buffers we are allowed to dirty. + * @h_total_credits: Number of remaining buffers we are allowed to add to + journal. These are dirty buffers and revoke descriptor blocks. * @h_revoke_credits: Number of remaining revoke records available for handle * @h_ref: Reference count on this handle. * @h_err: Field for caller's use to track errors through large fs operations. @@ -488,7 +489,7 @@ struct jbd2_revoke_table_s; * @h_type: For handle statistics. * @h_line_no: For handle statistics. * @h_start_jiffies: Handle Start time. - * @h_requested_credits: Holds @h_buffer_credits after handle is started. + * @h_requested_credits: Holds @h_total_credits after handle is started. * @h_revoke_credits_requested: Holds @h_revoke_credits after handle is started. * @saved_alloc_context: Saved context while transaction is open. **/ @@ -506,7 +507,7 @@ struct jbd2_journal_handle }; handle_t *h_rsv_handle; - int h_buffer_credits; + int h_total_credits; int h_revoke_credits; int h_revoke_credits_requested; int h_ref; @@ -1652,7 +1653,7 @@ static inline int jbd2_handle_buffer_credits(handle_t *handle) { journal_t *journal = handle->h_transaction->t_journal; - return handle->h_buffer_credits - + return handle->h_total_credits - DIV_ROUND_UP(handle->h_revoke_credits_requested, journal->j_revoke_records_per_block); } -- cgit v1.2.3 From 15122464d525f684a61806d28597050cdcef0f32 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Sat, 2 Nov 2019 01:12:03 +0100 Subject: icmp: add helpers to recognize ICMP error packets Add two helper functions, one for IPv4 and one for IPv6, to recognize the ICMP packets which are error responses. This packets are special because they have as payload the original header of the packet which generated it (RFC 792 says at least 8 bytes, but Linux actually includes much more than that). Signed-off-by: Matteo Croce Signed-off-by: David S. Miller --- include/linux/icmp.h | 15 +++++++++++++++ include/linux/icmpv6.h | 14 ++++++++++++++ 2 files changed, 29 insertions(+) (limited to 'include/linux') diff --git a/include/linux/icmp.h b/include/linux/icmp.h index 2d8aaf7d4b9e..81ca84ce3119 100644 --- a/include/linux/icmp.h +++ b/include/linux/icmp.h @@ -20,4 +20,19 @@ static inline struct icmphdr *icmp_hdr(const struct sk_buff *skb) { return (struct icmphdr *)skb_transport_header(skb); } + +static inline bool icmp_is_err(int type) +{ + switch (type) { + case ICMP_DEST_UNREACH: + case ICMP_SOURCE_QUENCH: + case ICMP_REDIRECT: + case ICMP_TIME_EXCEEDED: + case ICMP_PARAMETERPROB: + return true; + } + + return false; +} + #endif /* _LINUX_ICMP_H */ diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h index a8f888976137..ef1cbb5f454f 100644 --- a/include/linux/icmpv6.h +++ b/include/linux/icmpv6.h @@ -46,4 +46,18 @@ extern void icmpv6_flow_init(struct sock *sk, const struct in6_addr *saddr, const struct in6_addr *daddr, int oif); + +static inline bool icmpv6_is_err(int type) +{ + switch (type) { + case ICMPV6_DEST_UNREACH: + case ICMPV6_PKT_TOOBIG: + case ICMPV6_TIME_EXCEED: + case ICMPV6_PARAMPROB: + return true; + } + + return false; +} + #endif -- cgit v1.2.3 From a00351687f8a05773c1c57be80a5bbca68fa9ae8 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 23 Oct 2019 13:02:19 -0700 Subject: software node: remove DEV_PROP_MAX This definition is not used anywhere, let's remove it. Suggested-by: Andy Shevchenko Reviewed-by: Andy Shevchenko Signed-off-by: Dmitry Torokhov Signed-off-by: Rafael J. Wysocki --- include/linux/property.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/property.h b/include/linux/property.h index 054661109661..0a075fbde57e 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -22,7 +22,6 @@ enum dev_prop_type { DEV_PROP_U32, DEV_PROP_U64, DEV_PROP_STRING, - DEV_PROP_MAX, }; enum dev_dma_attr { -- cgit v1.2.3 From 1741cfacfa9ba047b3f2244fbe6e865602e70ddb Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 23 Oct 2019 13:02:20 -0700 Subject: software node: introduce PROPERTY_ENTRY_XXX_ARRAY_LEN() Sometimes we want to initialize property entry array from a regular pointer, when we can't determine length automatically via ARRAY_SIZE. Let's introduce PROPERTY_ENTRY_XXX_ARRAY_LEN macros that take explicit "len" argument. Reviewed-by: Andy Shevchenko Signed-off-by: Dmitry Torokhov Signed-off-by: Rafael J. Wysocki --- include/linux/property.h | 45 ++++++++++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/property.h b/include/linux/property.h index 0a075fbde57e..fad2e83a207c 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -261,33 +261,44 @@ struct property_entry { * and structs. */ -#define PROPERTY_ENTRY_INTEGER_ARRAY(_name_, _type_, _Type_, _val_) \ +#define PROPERTY_ENTRY_ARRAY_LEN(_name_, _type_, _Type_, _val_, _len_) \ (struct property_entry) { \ .name = _name_, \ - .length = ARRAY_SIZE(_val_) * sizeof(_type_), \ + .length = (_len_) * sizeof(_type_), \ .is_array = true, \ .type = DEV_PROP_##_Type_, \ { .pointer = { ._type_##_data = _val_ } }, \ } -#define PROPERTY_ENTRY_U8_ARRAY(_name_, _val_) \ - PROPERTY_ENTRY_INTEGER_ARRAY(_name_, u8, U8, _val_) -#define PROPERTY_ENTRY_U16_ARRAY(_name_, _val_) \ - PROPERTY_ENTRY_INTEGER_ARRAY(_name_, u16, U16, _val_) -#define PROPERTY_ENTRY_U32_ARRAY(_name_, _val_) \ - PROPERTY_ENTRY_INTEGER_ARRAY(_name_, u32, U32, _val_) -#define PROPERTY_ENTRY_U64_ARRAY(_name_, _val_) \ - PROPERTY_ENTRY_INTEGER_ARRAY(_name_, u64, U64, _val_) +#define PROPERTY_ENTRY_U8_ARRAY_LEN(_name_, _val_, _len_) \ + PROPERTY_ENTRY_ARRAY_LEN(_name_, u8, U8, _val_, _len_) +#define PROPERTY_ENTRY_U16_ARRAY_LEN(_name_, _val_, _len_) \ + PROPERTY_ENTRY_ARRAY_LEN(_name_, u16, U16, _val_, _len_) +#define PROPERTY_ENTRY_U32_ARRAY_LEN(_name_, _val_, _len_) \ + PROPERTY_ENTRY_ARRAY_LEN(_name_, u32, U32, _val_, _len_) +#define PROPERTY_ENTRY_U64_ARRAY_LEN(_name_, _val_, _len_) \ + PROPERTY_ENTRY_ARRAY_LEN(_name_, u64, U64, _val_, _len_) -#define PROPERTY_ENTRY_STRING_ARRAY(_name_, _val_) \ -(struct property_entry) { \ - .name = _name_, \ - .length = ARRAY_SIZE(_val_) * sizeof(const char *), \ - .is_array = true, \ - .type = DEV_PROP_STRING, \ - { .pointer = { .str = _val_ } }, \ +#define PROPERTY_ENTRY_STRING_ARRAY_LEN(_name_, _val_, _len_) \ +(struct property_entry) { \ + .name = _name_, \ + .length = (_len_) * sizeof(const char *), \ + .is_array = true, \ + .type = DEV_PROP_STRING, \ + { .pointer = { .str = _val_ } }, \ } +#define PROPERTY_ENTRY_U8_ARRAY(_name_, _val_) \ + PROPERTY_ENTRY_U8_ARRAY_LEN(_name_, _val_, ARRAY_SIZE(_val_)) +#define PROPERTY_ENTRY_U16_ARRAY(_name_, _val_) \ + PROPERTY_ENTRY_U16_ARRAY_LEN(_name_, _val_, ARRAY_SIZE(_val_)) +#define PROPERTY_ENTRY_U32_ARRAY(_name_, _val_) \ + PROPERTY_ENTRY_U32_ARRAY_LEN(_name_, _val_, ARRAY_SIZE(_val_)) +#define PROPERTY_ENTRY_U64_ARRAY(_name_, _val_) \ + PROPERTY_ENTRY_U64_ARRAY_LEN(_name_, _val_, ARRAY_SIZE(_val_)) +#define PROPERTY_ENTRY_STRING_ARRAY(_name_, _val_) \ + PROPERTY_ENTRY_STRING_ARRAY_LEN(_name_, _val_, ARRAY_SIZE(_val_)) + #define PROPERTY_ENTRY_INTEGER(_name_, _type_, _Type_, _val_) \ (struct property_entry) { \ .name = _name_, \ -- cgit v1.2.3 From b871160fbc61d8cd43440b3903d402e90e28e321 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 23 Oct 2019 13:02:22 -0700 Subject: software node: mark internal macros with double underscores Let's mark PROPERTY_ENTRY_* macros that are internal with double leading underscores so users are not tempted to use them. Signed-off-by: Dmitry Torokhov Signed-off-by: Rafael J. Wysocki --- include/linux/property.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/property.h b/include/linux/property.h index fad2e83a207c..d6019bacd848 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -261,7 +261,7 @@ struct property_entry { * and structs. */ -#define PROPERTY_ENTRY_ARRAY_LEN(_name_, _type_, _Type_, _val_, _len_) \ +#define __PROPERTY_ENTRY_ARRAY_LEN(_name_, _type_, _Type_, _val_, _len_)\ (struct property_entry) { \ .name = _name_, \ .length = (_len_) * sizeof(_type_), \ @@ -271,13 +271,13 @@ struct property_entry { } #define PROPERTY_ENTRY_U8_ARRAY_LEN(_name_, _val_, _len_) \ - PROPERTY_ENTRY_ARRAY_LEN(_name_, u8, U8, _val_, _len_) + __PROPERTY_ENTRY_ARRAY_LEN(_name_, u8, U8, _val_, _len_) #define PROPERTY_ENTRY_U16_ARRAY_LEN(_name_, _val_, _len_) \ - PROPERTY_ENTRY_ARRAY_LEN(_name_, u16, U16, _val_, _len_) + __PROPERTY_ENTRY_ARRAY_LEN(_name_, u16, U16, _val_, _len_) #define PROPERTY_ENTRY_U32_ARRAY_LEN(_name_, _val_, _len_) \ - PROPERTY_ENTRY_ARRAY_LEN(_name_, u32, U32, _val_, _len_) + __PROPERTY_ENTRY_ARRAY_LEN(_name_, u32, U32, _val_, _len_) #define PROPERTY_ENTRY_U64_ARRAY_LEN(_name_, _val_, _len_) \ - PROPERTY_ENTRY_ARRAY_LEN(_name_, u64, U64, _val_, _len_) + __PROPERTY_ENTRY_ARRAY_LEN(_name_, u64, U64, _val_, _len_) #define PROPERTY_ENTRY_STRING_ARRAY_LEN(_name_, _val_, _len_) \ (struct property_entry) { \ @@ -299,7 +299,7 @@ struct property_entry { #define PROPERTY_ENTRY_STRING_ARRAY(_name_, _val_) \ PROPERTY_ENTRY_STRING_ARRAY_LEN(_name_, _val_, ARRAY_SIZE(_val_)) -#define PROPERTY_ENTRY_INTEGER(_name_, _type_, _Type_, _val_) \ +#define __PROPERTY_ENTRY_INTEGER(_name_, _type_, _Type_, _val_) \ (struct property_entry) { \ .name = _name_, \ .length = sizeof(_type_), \ @@ -308,13 +308,13 @@ struct property_entry { } #define PROPERTY_ENTRY_U8(_name_, _val_) \ - PROPERTY_ENTRY_INTEGER(_name_, u8, U8, _val_) + __PROPERTY_ENTRY_INTEGER(_name_, u8, U8, _val_) #define PROPERTY_ENTRY_U16(_name_, _val_) \ - PROPERTY_ENTRY_INTEGER(_name_, u16, U16, _val_) + __PROPERTY_ENTRY_INTEGER(_name_, u16, U16, _val_) #define PROPERTY_ENTRY_U32(_name_, _val_) \ - PROPERTY_ENTRY_INTEGER(_name_, u32, U32, _val_) + __PROPERTY_ENTRY_INTEGER(_name_, u32, U32, _val_) #define PROPERTY_ENTRY_U64(_name_, _val_) \ - PROPERTY_ENTRY_INTEGER(_name_, u64, U64, _val_) + __PROPERTY_ENTRY_INTEGER(_name_, u64, U64, _val_) #define PROPERTY_ENTRY_STRING(_name_, _val_) \ (struct property_entry) { \ -- cgit v1.2.3 From 1f74d70ff21249670eb68c1344e0687aa909861d Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 23 Oct 2019 13:02:24 -0700 Subject: software node: get rid of property_set_pointer() Instead of explicitly setting values of integer types when copying property entries lets just copy entire value union when processing non-array values. For value arrays we no longer use union of pointers, but rather a single void pointer, which allows us to remove property_set_pointer(). In property_get_pointer() we do not need to handle each data type separately, we can simply return either the pointer or pointer to values union. We are not losing anything from removing typed pointer union because the upper layers do their accesses through void pointers anyway, and we trust the "type" of the property when interpret the data. We rely on users of property entries on using PROPERTY_ENTRY_XXX() macros to properly initialize entries instead of poking in the instances directly. Signed-off-by: Dmitry Torokhov Signed-off-by: Rafael J. Wysocki --- include/linux/property.h | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/property.h b/include/linux/property.h index d6019bacd848..12eff7cbb395 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -238,13 +238,7 @@ struct property_entry { bool is_array; enum dev_prop_type type; union { - union { - const u8 *u8_data; - const u16 *u16_data; - const u32 *u32_data; - const u64 *u64_data; - const char * const *str; - } pointer; + const void *pointer; union { u8 u8_data; u16 u16_data; @@ -267,7 +261,7 @@ struct property_entry { .length = (_len_) * sizeof(_type_), \ .is_array = true, \ .type = DEV_PROP_##_Type_, \ - { .pointer = { ._type_##_data = _val_ } }, \ + { .pointer = _val_ }, \ } #define PROPERTY_ENTRY_U8_ARRAY_LEN(_name_, _val_, _len_) \ @@ -285,7 +279,7 @@ struct property_entry { .length = (_len_) * sizeof(const char *), \ .is_array = true, \ .type = DEV_PROP_STRING, \ - { .pointer = { .str = _val_ } }, \ + { .pointer = _val_ }, \ } #define PROPERTY_ENTRY_U8_ARRAY(_name_, _val_) \ -- cgit v1.2.3 From daeba9bf62e6d03667915899af48471cdf26fde4 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 23 Oct 2019 13:02:26 -0700 Subject: software node: unify PROPERTY_ENTRY_XXX macros We can unify string properties initializer macros with integer initializers. Signed-off-by: Dmitry Torokhov Signed-off-by: Rafael J. Wysocki --- include/linux/property.h | 64 ++++++++++++++++++++---------------------------- 1 file changed, 27 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/include/linux/property.h b/include/linux/property.h index 12eff7cbb395..48335288c2a9 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -250,37 +250,33 @@ struct property_entry { }; /* - * Note: the below four initializers for the anonymous union are carefully + * Note: the below initializers for the anonymous union are carefully * crafted to avoid gcc-4.4.4's problems with initialization of anon unions * and structs. */ -#define __PROPERTY_ENTRY_ARRAY_LEN(_name_, _type_, _Type_, _val_, _len_)\ +#define __PROPERTY_ENTRY_ELEMENT_SIZE(_elem_) \ + sizeof(((struct property_entry *)NULL)->value._elem_) + +#define __PROPERTY_ENTRY_ARRAY_LEN(_name_, _elem_, _Type_, _val_, _len_)\ (struct property_entry) { \ .name = _name_, \ - .length = (_len_) * sizeof(_type_), \ + .length = (_len_) * __PROPERTY_ENTRY_ELEMENT_SIZE(_elem_), \ .is_array = true, \ .type = DEV_PROP_##_Type_, \ { .pointer = _val_ }, \ } #define PROPERTY_ENTRY_U8_ARRAY_LEN(_name_, _val_, _len_) \ - __PROPERTY_ENTRY_ARRAY_LEN(_name_, u8, U8, _val_, _len_) + __PROPERTY_ENTRY_ARRAY_LEN(_name_, u8_data, U8, _val_, _len_) #define PROPERTY_ENTRY_U16_ARRAY_LEN(_name_, _val_, _len_) \ - __PROPERTY_ENTRY_ARRAY_LEN(_name_, u16, U16, _val_, _len_) + __PROPERTY_ENTRY_ARRAY_LEN(_name_, u16_data, U16, _val_, _len_) #define PROPERTY_ENTRY_U32_ARRAY_LEN(_name_, _val_, _len_) \ - __PROPERTY_ENTRY_ARRAY_LEN(_name_, u32, U32, _val_, _len_) + __PROPERTY_ENTRY_ARRAY_LEN(_name_, u32_data, U32, _val_, _len_) #define PROPERTY_ENTRY_U64_ARRAY_LEN(_name_, _val_, _len_) \ - __PROPERTY_ENTRY_ARRAY_LEN(_name_, u64, U64, _val_, _len_) - + __PROPERTY_ENTRY_ARRAY_LEN(_name_, u64_data, U64, _val_, _len_) #define PROPERTY_ENTRY_STRING_ARRAY_LEN(_name_, _val_, _len_) \ -(struct property_entry) { \ - .name = _name_, \ - .length = (_len_) * sizeof(const char *), \ - .is_array = true, \ - .type = DEV_PROP_STRING, \ - { .pointer = _val_ }, \ -} + __PROPERTY_ENTRY_ARRAY_LEN(_name_, str, STRING, _val_, _len_) #define PROPERTY_ENTRY_U8_ARRAY(_name_, _val_) \ PROPERTY_ENTRY_U8_ARRAY_LEN(_name_, _val_, ARRAY_SIZE(_val_)) @@ -293,30 +289,24 @@ struct property_entry { #define PROPERTY_ENTRY_STRING_ARRAY(_name_, _val_) \ PROPERTY_ENTRY_STRING_ARRAY_LEN(_name_, _val_, ARRAY_SIZE(_val_)) -#define __PROPERTY_ENTRY_INTEGER(_name_, _type_, _Type_, _val_) \ -(struct property_entry) { \ - .name = _name_, \ - .length = sizeof(_type_), \ - .type = DEV_PROP_##_Type_, \ - { .value = { ._type_##_data = _val_ } }, \ +#define __PROPERTY_ENTRY_ELEMENT(_name_, _elem_, _Type_, _val_) \ +(struct property_entry) { \ + .name = _name_, \ + .length = __PROPERTY_ENTRY_ELEMENT_SIZE(_elem_), \ + .type = DEV_PROP_##_Type_, \ + { .value = { ._elem_ = _val_ } }, \ } -#define PROPERTY_ENTRY_U8(_name_, _val_) \ - __PROPERTY_ENTRY_INTEGER(_name_, u8, U8, _val_) -#define PROPERTY_ENTRY_U16(_name_, _val_) \ - __PROPERTY_ENTRY_INTEGER(_name_, u16, U16, _val_) -#define PROPERTY_ENTRY_U32(_name_, _val_) \ - __PROPERTY_ENTRY_INTEGER(_name_, u32, U32, _val_) -#define PROPERTY_ENTRY_U64(_name_, _val_) \ - __PROPERTY_ENTRY_INTEGER(_name_, u64, U64, _val_) - -#define PROPERTY_ENTRY_STRING(_name_, _val_) \ -(struct property_entry) { \ - .name = _name_, \ - .length = sizeof(const char *), \ - .type = DEV_PROP_STRING, \ - { .value = { .str = _val_ } }, \ -} +#define PROPERTY_ENTRY_U8(_name_, _val_) \ + __PROPERTY_ENTRY_ELEMENT(_name_, u8_data, U8, _val_) +#define PROPERTY_ENTRY_U16(_name_, _val_) \ + __PROPERTY_ENTRY_ELEMENT(_name_, u16_data, U16, _val_) +#define PROPERTY_ENTRY_U32(_name_, _val_) \ + __PROPERTY_ENTRY_ELEMENT(_name_, u32_data, U32, _val_) +#define PROPERTY_ENTRY_U64(_name_, _val_) \ + __PROPERTY_ENTRY_ELEMENT(_name_, u64_data, U64, _val_) +#define PROPERTY_ENTRY_STRING(_name_, _val_) \ + __PROPERTY_ENTRY_ELEMENT(_name_, str, STRING, _val_) #define PROPERTY_ENTRY_BOOL(_name_) \ (struct property_entry) { \ -- cgit v1.2.3 From ea81bae46032022656d45d4a395f1bf3b96697f0 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Tue, 1 Oct 2019 10:37:33 +0200 Subject: mm: Remove BUG_ON mmap_sem not held from xxx_trans_huge_lock() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The caller needs to make sure that the vma is not torn down during the lock operation and can also use the i_mmap_rwsem for file-backed vmas. Remove the BUG_ON. We could, as an alternative, add a test that either vma->vm_mm->mmap_sem or vma->vm_file->f_mapping->i_mmap_rwsem are held. Cc: Andrew Morton Cc: Matthew Wilcox Cc: Will Deacon Cc: Peter Zijlstra Cc: Rik van Riel Cc: Minchan Kim Cc: Michal Hocko Cc: Huang Ying Cc: Jérôme Glisse Cc: Kirill A. Shutemov Signed-off-by: Thomas Hellstrom Acked-by: Kirill A. Shutemov --- include/linux/huge_mm.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 93d5cf0bc716..0b84e13e88e2 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -216,7 +216,6 @@ static inline int is_swap_pmd(pmd_t pmd) static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma) { - VM_BUG_ON_VMA(!rwsem_is_locked(&vma->vm_mm->mmap_sem), vma); if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) return __pmd_trans_huge_lock(pmd, vma); else @@ -225,7 +224,6 @@ static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd, static inline spinlock_t *pud_trans_huge_lock(pud_t *pud, struct vm_area_struct *vma) { - VM_BUG_ON_VMA(!rwsem_is_locked(&vma->vm_mm->mmap_sem), vma); if (pud_trans_huge(*pud) || pud_devmap(*pud)) return __pud_trans_huge_lock(pud, vma); else -- cgit v1.2.3 From ecaad8aca20432fa60821282d8ff479629c9f7b9 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Tue, 1 Oct 2019 11:17:34 +0200 Subject: mm: Add a walk_page_mapping() function to the pagewalk code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For users that want to travers all page table entries pointing into a region of a struct address_space mapping, introduce a walk_page_mapping() function. The walk_page_mapping() function will be initially be used for dirty- tracking in virtual graphics drivers. Cc: Andrew Morton Cc: Matthew Wilcox Cc: Will Deacon Cc: Peter Zijlstra Cc: Rik van Riel Cc: Minchan Kim Cc: Michal Hocko Cc: Huang Ying Cc: Jérôme Glisse Cc: Kirill A. Shutemov Signed-off-by: Thomas Hellstrom Reviewed-by: Andrew Morton --- include/linux/pagewalk.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pagewalk.h b/include/linux/pagewalk.h index bddd9759bab9..6ec82e92c87f 100644 --- a/include/linux/pagewalk.h +++ b/include/linux/pagewalk.h @@ -24,6 +24,9 @@ struct mm_walk; * "do page table walk over the current vma", returning * a negative value means "abort current page table walk * right now" and returning 1 means "skip the current vma" + * @pre_vma: if set, called before starting walk on a non-null vma. + * @post_vma: if set, called after a walk on a non-null vma, provided + * that @pre_vma and the vma walk succeeded. */ struct mm_walk_ops { int (*pud_entry)(pud_t *pud, unsigned long addr, @@ -39,6 +42,9 @@ struct mm_walk_ops { struct mm_walk *walk); int (*test_walk)(unsigned long addr, unsigned long next, struct mm_walk *walk); + int (*pre_vma)(unsigned long start, unsigned long end, + struct mm_walk *walk); + void (*post_vma)(struct mm_walk *walk); }; /** @@ -62,5 +68,8 @@ int walk_page_range(struct mm_struct *mm, unsigned long start, void *private); int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops, void *private); +int walk_page_mapping(struct address_space *mapping, pgoff_t first_index, + pgoff_t nr, const struct mm_walk_ops *ops, + void *private); #endif /* _LINUX_PAGEWALK_H */ -- cgit v1.2.3 From c5acad84cf1e33ca1a50984952e1c5b2caa0e13f Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Tue, 19 Mar 2019 13:12:30 +0100 Subject: mm: Add write-protect and clean utilities for address space ranges MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add two utilities to 1) write-protect and 2) clean all ptes pointing into a range of an address space. The utilities are intended to aid in tracking dirty pages (either driver-allocated system memory or pci device memory). The write-protect utility should be used in conjunction with page_mkwrite() and pfn_mkwrite() to trigger write page-faults on page accesses. Typically one would want to use this on sparse accesses into large memory regions. The clean utility should be used to utilize hardware dirtying functionality and avoid the overhead of page-faults, typically on large accesses into small memory regions. Cc: Andrew Morton Cc: Matthew Wilcox Cc: Will Deacon Cc: Peter Zijlstra Cc: Rik van Riel Cc: Minchan Kim Cc: Michal Hocko Cc: Huang Ying Cc: Jérôme Glisse Cc: Kirill A. Shutemov Signed-off-by: Thomas Hellstrom Acked-by: Andrew Morton --- include/linux/mm.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index cc292273e6ba..4bc93477375e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2637,7 +2637,6 @@ typedef int (*pte_fn_t)(pte_t *pte, unsigned long addr, void *data); extern int apply_to_page_range(struct mm_struct *mm, unsigned long address, unsigned long size, pte_fn_t fn, void *data); - #ifdef CONFIG_PAGE_POISONING extern bool page_poisoning_enabled(void); extern void kernel_poison_pages(struct page *page, int numpages, int enable); @@ -2878,5 +2877,17 @@ static inline int pages_identical(struct page *page1, struct page *page2) return !memcmp_pages(page1, page2); } +#ifdef CONFIG_MAPPING_DIRTY_HELPERS +unsigned long clean_record_shared_mapping_range(struct address_space *mapping, + pgoff_t first_index, pgoff_t nr, + pgoff_t bitmap_pgoff, + unsigned long *bitmap, + pgoff_t *start, + pgoff_t *end); + +unsigned long wp_shared_mapping_range(struct address_space *mapping, + pgoff_t first_index, pgoff_t nr); +#endif + #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ -- cgit v1.2.3 From 99e98d3fb1008ef7416e16a1fd355cb73a253502 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 4 Nov 2019 12:16:17 +0100 Subject: cpuidle: Consolidate disabled state checks There are two reasons why CPU idle states may be disabled: either because the driver has disabled them or because they have been disabled by user space via sysfs. In the former case, the state's "disabled" flag is set once during the initialization of the driver and it is never cleared later (it is read-only effectively). In the latter case, the "disable" field of the given state's cpuidle_state_usage struct is set and it may be changed via sysfs. Thus checking whether or not an idle state has been disabled involves reading these two flags every time. In order to avoid the additional check of the state's "disabled" flag (which is effectively read-only anyway), use the value of it at the init time to set a (new) flag in the "disable" field of that state's cpuidle_state_usage structure and use the sysfs interface to manipulate another (new) flag in it. This way the state is disabled whenever the "disable" field of its cpuidle_state_usage structure is nonzero, whatever the reason, and it is the only place to look into to check whether or not the state has been disabled. Signed-off-by: Rafael J. Wysocki Acked-by: Daniel Lezcano Acked-by: Peter Zijlstra (Intel) --- include/linux/cpuidle.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 4b6b5bea8f79..d23a3b1ddcf6 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -29,6 +29,9 @@ struct cpuidle_driver; * CPUIDLE DEVICE INTERFACE * ****************************/ +#define CPUIDLE_STATE_DISABLED_BY_USER BIT(0) +#define CPUIDLE_STATE_DISABLED_BY_DRIVER BIT(1) + struct cpuidle_state_usage { unsigned long long disable; unsigned long long usage; -- cgit v1.2.3 From 634d811c619b0dbe16dc890a53d2c978e9d055d5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Nov 2019 14:59:02 -0500 Subject: nfsv4: Move NFSPROC4_CLNT_COPY_NOTIFY to end of list We shouldn't insert things into the NFSPROC4_CLNT enums, since that causes the nfsstat array to be reordered. Signed-off-by: Trond Myklebust --- include/linux/nfs4.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 5e7a5261af4e..82d8fb422092 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -537,10 +537,11 @@ enum { NFSPROC4_CLNT_CLONE, NFSPROC4_CLNT_COPY, NFSPROC4_CLNT_OFFLOAD_CANCEL, - NFSPROC4_CLNT_COPY_NOTIFY, NFSPROC4_CLNT_LOOKUPP, NFSPROC4_CLNT_LAYOUTERROR, + + NFSPROC4_CLNT_COPY_NOTIFY, }; /* nfs41 types */ -- cgit v1.2.3 From fbf6c73c5b264c25484fa9f449b5546569fe11f0 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 16 Oct 2019 17:51:10 +0100 Subject: ftrace: add ftrace_init_nop() Architectures may need to perform special initialization of ftrace callsites, and today they do so by special-casing ftrace_make_nop() when the expected branch address is MCOUNT_ADDR. In some cases (e.g. for patchable-function-entry), we don't have an mcount-like symbol and don't want a synthetic MCOUNT_ADDR, but we may need to perform some initialization of callsites. To make it possible to separate initialization from runtime modification, and to handle cases without an mcount-like symbol, this patch adds an optional ftrace_init_nop() function that architectures can implement, which does not pass a branch address. Where an architecture does not provide ftrace_init_nop(), we will fall back to the existing behaviour of calling ftrace_make_nop() with MCOUNT_ADDR. At the same time, ftrace_code_disable() is renamed to ftrace_nop_initialize() to make it clearer that it is intended to intialize a callsite into a disabled state, and is not for disabling a callsite that has been runtime enabled. The kerneldoc description of rec arguments is updated to cover non-mcount callsites. Signed-off-by: Mark Rutland Reviewed-by: Amit Daniel Kachhap Reviewed-by: Ard Biesheuvel Reviewed-by: Miroslav Benes Reviewed-by: Steven Rostedt (VMware) Reviewed-by: Torsten Duwe Tested-by: Amit Daniel Kachhap Tested-by: Sven Schnelle Tested-by: Torsten Duwe Cc: Ingo Molnar --- include/linux/ftrace.h | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 8a8cb3c401b2..9867d90d635e 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -499,7 +499,7 @@ static inline int ftrace_disable_ftrace_graph_caller(void) { return 0; } /** * ftrace_make_nop - convert code into nop * @mod: module structure if called by module load initialization - * @rec: the mcount call site record + * @rec: the call site record (e.g. mcount/fentry) * @addr: the address that the call site should be calling * * This is a very sensitive operation and great care needs @@ -520,9 +520,38 @@ static inline int ftrace_disable_ftrace_graph_caller(void) { return 0; } extern int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr); + +/** + * ftrace_init_nop - initialize a nop call site + * @mod: module structure if called by module load initialization + * @rec: the call site record (e.g. mcount/fentry) + * + * This is a very sensitive operation and great care needs + * to be taken by the arch. The operation should carefully + * read the location, check to see if what is read is indeed + * what we expect it to be, and then on success of the compare, + * it should write to the location. + * + * The code segment at @rec->ip should contain the contents created by + * the compiler + * + * Return must be: + * 0 on success + * -EFAULT on error reading the location + * -EINVAL on a failed compare of the contents + * -EPERM on error writing to the location + * Any other value will be considered a failure. + */ +#ifndef ftrace_init_nop +static inline int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) +{ + return ftrace_make_nop(mod, rec, MCOUNT_ADDR); +} +#endif + /** * ftrace_make_call - convert a nop call site into a call to addr - * @rec: the mcount call site record + * @rec: the call site record (e.g. mcount/fentry) * @addr: the address that the call site should call * * This is a very sensitive operation and great care needs @@ -545,7 +574,7 @@ extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr); #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS /** * ftrace_modify_call - convert from one addr to another (no nop) - * @rec: the mcount call site record + * @rec: the call site record (e.g. mcount/fentry) * @old_addr: the address expected to be currently called to * @addr: the address to change to * -- cgit v1.2.3 From a1326b17ac03a9012cb3d01e434aacb4d67a416c Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 16 Oct 2019 18:17:11 +0100 Subject: module/ftrace: handle patchable-function-entry When using patchable-function-entry, the compiler will record the callsites into a section named "__patchable_function_entries" rather than "__mcount_loc". Let's abstract this difference behind a new FTRACE_CALLSITE_SECTION, so that architectures don't have to handle this explicitly (e.g. with custom module linker scripts). As parisc currently handles this explicitly, it is fixed up accordingly, with its custom linker script removed. Since FTRACE_CALLSITE_SECTION is only defined when DYNAMIC_FTRACE is selected, the parisc module loading code is updated to only use the definition in that case. When DYNAMIC_FTRACE is not selected, modules shouldn't have this section, so this removes some redundant work in that case. To make sure that this is keep up-to-date for modules and the main kernel, a comment is added to vmlinux.lds.h, with the existing ifdeffery simplified for legibility. I built parisc generic-{32,64}bit_defconfig with DYNAMIC_FTRACE enabled, and verified that the section made it into the .ko files for modules. Signed-off-by: Mark Rutland Acked-by: Helge Deller Acked-by: Steven Rostedt (VMware) Reviewed-by: Ard Biesheuvel Reviewed-by: Torsten Duwe Tested-by: Amit Daniel Kachhap Tested-by: Sven Schnelle Tested-by: Torsten Duwe Cc: Ingo Molnar Cc: James E.J. Bottomley Cc: Jessica Yu Cc: linux-parisc@vger.kernel.org --- include/linux/ftrace.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 9867d90d635e..9141f2263286 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -738,6 +738,11 @@ static inline unsigned long get_lock_parent_ip(void) #ifdef CONFIG_FTRACE_MCOUNT_RECORD extern void ftrace_init(void); +#ifdef CC_USING_PATCHABLE_FUNCTION_ENTRY +#define FTRACE_CALLSITE_SECTION "__patchable_function_entries" +#else +#define FTRACE_CALLSITE_SECTION "__mcount_loc" +#endif #else static inline void ftrace_init(void) { } #endif -- cgit v1.2.3 From b103fb7653fff09e7a6fb6ba9398a41584e7ae36 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 24 Oct 2019 14:54:36 -0700 Subject: fscrypt: add support for IV_INO_LBLK_64 policies Inline encryption hardware compliant with the UFS v2.1 standard or with the upcoming version of the eMMC standard has the following properties: (1) Per I/O request, the encryption key is specified by a previously loaded keyslot. There might be only a small number of keyslots. (2) Per I/O request, the starting IV is specified by a 64-bit "data unit number" (DUN). IV bits 64-127 are assumed to be 0. The hardware automatically increments the DUN for each "data unit" of configurable size in the request, e.g. for each filesystem block. Property (1) makes it inefficient to use the traditional fscrypt per-file keys. Property (2) precludes the use of the existing DIRECT_KEY fscrypt policy flag, which needs at least 192 IV bits. Therefore, add a new fscrypt policy flag IV_INO_LBLK_64 which causes the encryption to modified as follows: - The encryption keys are derived from the master key, encryption mode number, and filesystem UUID. - The IVs are chosen as (inode_number << 32) | file_logical_block_num. For filenames encryption, file_logical_block_num is 0. Since the file nonces aren't used in the key derivation, many files may share the same encryption key. This is much more efficient on the target hardware. Including the inode number in the IVs and mixing the filesystem UUID into the keys ensures that data in different files is nevertheless still encrypted differently. Additionally, limiting the inode and block numbers to 32 bits and placing the block number in the low bits maintains compatibility with the 64-bit DUN convention (property (2) above). Since this scheme assumes that inode numbers are stable (which may preclude filesystem shrinking) and that inode and file logical block numbers are at most 32-bit, IV_INO_LBLK_64 will only be allowed on filesystems that meet these constraints. These are acceptable limitations for the cases where this format would actually be used. Note that IV_INO_LBLK_64 is an on-disk format, not an implementation. This patch just adds support for it using the existing filesystem layer encryption. A later patch will add support for inline encryption. Reviewed-by: Paul Crowley Co-developed-by: Satya Tangirala Signed-off-by: Satya Tangirala Signed-off-by: Eric Biggers --- include/linux/fscrypt.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 04f5ed628445..1a7bffe78ed5 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -61,6 +61,9 @@ struct fscrypt_operations { bool (*dummy_context)(struct inode *); bool (*empty_dir)(struct inode *); unsigned int max_namelen; + bool (*has_stable_inodes)(struct super_block *sb); + void (*get_ino_and_lblk_bits)(struct super_block *sb, + int *ino_bits_ret, int *lblk_bits_ret); }; static inline bool fscrypt_has_encryption_key(const struct inode *inode) -- cgit v1.2.3 From 1bb5ec2eec48dcab1d8ae3707e4a388da6a9c9dc Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 6 Nov 2019 12:49:57 -0800 Subject: cgroup: use cgroup->last_bstat instead of cgroup->bstat_pending for consistency cgroup->bstat_pending is used to determine the base stat delta to propagate to the parent. While correct, this is different from how percpu delta is determined for no good reason and the inconsistency makes the code more difficult to understand. This patch makes parent propagation delta calculation use the same method as percpu to global propagation. * cgroup_base_stat_accumulate() is renamed to cgroup_base_stat_add() and cgroup_base_stat_sub() is added. * percpu propagation calculation is updated to use the above helpers. * cgroup->bstat_pending is replaced with cgroup->last_bstat and updated to use the same calculation as percpu propagation. Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 430e219e3aba..4904b1ebd1ff 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -458,7 +458,7 @@ struct cgroup { struct list_head rstat_css_list; /* cgroup basic resource statistics */ - struct cgroup_base_stat pending_bstat; /* pending from children */ + struct cgroup_base_stat last_bstat; struct cgroup_base_stat bstat; struct prev_cputime prev_cputime; /* for printing out cputime */ -- cgit v1.2.3 From 56144737e67329c9aaed15f942d46a6302e2e3d8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Nov 2019 09:48:04 -0800 Subject: hrtimer: Annotate lockless access to timer->state syzbot reported various data-race caused by hrtimer_is_queued() reading timer->state. A READ_ONCE() is required there to silence the warning. Also add the corresponding WRITE_ONCE() when timer->state is set. In remove_hrtimer() the hrtimer_is_queued() helper is open coded to avoid loading timer->state twice. KCSAN reported these cases: BUG: KCSAN: data-race in __remove_hrtimer / tcp_pacing_check write to 0xffff8880b2a7d388 of 1 bytes by interrupt on cpu 0: __remove_hrtimer+0x52/0x130 kernel/time/hrtimer.c:991 __run_hrtimer kernel/time/hrtimer.c:1496 [inline] __hrtimer_run_queues+0x250/0x600 kernel/time/hrtimer.c:1576 hrtimer_run_softirq+0x10e/0x150 kernel/time/hrtimer.c:1593 __do_softirq+0x115/0x33f kernel/softirq.c:292 run_ksoftirqd+0x46/0x60 kernel/softirq.c:603 smpboot_thread_fn+0x37d/0x4a0 kernel/smpboot.c:165 kthread+0x1d4/0x200 drivers/block/aoe/aoecmd.c:1253 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:352 read to 0xffff8880b2a7d388 of 1 bytes by task 24652 on cpu 1: tcp_pacing_check net/ipv4/tcp_output.c:2235 [inline] tcp_pacing_check+0xba/0x130 net/ipv4/tcp_output.c:2225 tcp_xmit_retransmit_queue+0x32c/0x5a0 net/ipv4/tcp_output.c:3044 tcp_xmit_recovery+0x7c/0x120 net/ipv4/tcp_input.c:3558 tcp_ack+0x17b6/0x3170 net/ipv4/tcp_input.c:3717 tcp_rcv_established+0x37e/0xf50 net/ipv4/tcp_input.c:5696 tcp_v4_do_rcv+0x381/0x4e0 net/ipv4/tcp_ipv4.c:1561 sk_backlog_rcv include/net/sock.h:945 [inline] __release_sock+0x135/0x1e0 net/core/sock.c:2435 release_sock+0x61/0x160 net/core/sock.c:2951 sk_stream_wait_memory+0x3d7/0x7c0 net/core/stream.c:145 tcp_sendmsg_locked+0xb47/0x1f30 net/ipv4/tcp.c:1393 tcp_sendmsg+0x39/0x60 net/ipv4/tcp.c:1434 inet_sendmsg+0x6d/0x90 net/ipv4/af_inet.c:807 sock_sendmsg_nosec net/socket.c:637 [inline] sock_sendmsg+0x9f/0xc0 net/socket.c:657 BUG: KCSAN: data-race in __remove_hrtimer / __tcp_ack_snd_check write to 0xffff8880a3a65588 of 1 bytes by interrupt on cpu 0: __remove_hrtimer+0x52/0x130 kernel/time/hrtimer.c:991 __run_hrtimer kernel/time/hrtimer.c:1496 [inline] __hrtimer_run_queues+0x250/0x600 kernel/time/hrtimer.c:1576 hrtimer_run_softirq+0x10e/0x150 kernel/time/hrtimer.c:1593 __do_softirq+0x115/0x33f kernel/softirq.c:292 invoke_softirq kernel/softirq.c:373 [inline] irq_exit+0xbb/0xe0 kernel/softirq.c:413 exiting_irq arch/x86/include/asm/apic.h:536 [inline] smp_apic_timer_interrupt+0xe6/0x280 arch/x86/kernel/apic/apic.c:1137 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:830 read to 0xffff8880a3a65588 of 1 bytes by task 22891 on cpu 1: __tcp_ack_snd_check+0x415/0x4f0 net/ipv4/tcp_input.c:5265 tcp_ack_snd_check net/ipv4/tcp_input.c:5287 [inline] tcp_rcv_established+0x750/0xf50 net/ipv4/tcp_input.c:5708 tcp_v4_do_rcv+0x381/0x4e0 net/ipv4/tcp_ipv4.c:1561 sk_backlog_rcv include/net/sock.h:945 [inline] __release_sock+0x135/0x1e0 net/core/sock.c:2435 release_sock+0x61/0x160 net/core/sock.c:2951 sk_stream_wait_memory+0x3d7/0x7c0 net/core/stream.c:145 tcp_sendmsg_locked+0xb47/0x1f30 net/ipv4/tcp.c:1393 tcp_sendmsg+0x39/0x60 net/ipv4/tcp.c:1434 inet_sendmsg+0x6d/0x90 net/ipv4/af_inet.c:807 sock_sendmsg_nosec net/socket.c:637 [inline] sock_sendmsg+0x9f/0xc0 net/socket.c:657 __sys_sendto+0x21f/0x320 net/socket.c:1952 __do_sys_sendto net/socket.c:1964 [inline] __se_sys_sendto net/socket.c:1960 [inline] __x64_sys_sendto+0x89/0xb0 net/socket.c:1960 do_syscall_64+0xcc/0x370 arch/x86/entry/common.c:290 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 24652 Comm: syz-executor.3 Not tainted 5.4.0-rc3+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 [ tglx: Added comments ] Reported-by: syzbot Signed-off-by: Eric Dumazet Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20191106174804.74723-1-edumazet@google.com --- include/linux/hrtimer.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 1b9a51a1bccb..1f98b52118f0 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -456,12 +456,18 @@ extern u64 hrtimer_next_event_without(const struct hrtimer *exclude); extern bool hrtimer_active(const struct hrtimer *timer); -/* - * Helper function to check, whether the timer is on one of the queues +/** + * hrtimer_is_queued = check, whether the timer is on one of the queues + * @timer: Timer to check + * + * Returns: True if the timer is queued, false otherwise + * + * The function can be used lockless, but it gives only a current snapshot. */ -static inline int hrtimer_is_queued(struct hrtimer *timer) +static inline bool hrtimer_is_queued(struct hrtimer *timer) { - return timer->state & HRTIMER_STATE_ENQUEUED; + /* The READ_ONCE pairs with the update functions of timer->state */ + return !!(READ_ONCE(timer->state) & HRTIMER_STATE_ENQUEUED); } /* -- cgit v1.2.3 From f96c8e50152814d05a4002b8c03a80366a27afa3 Mon Sep 17 00:00:00 2001 From: Amit Kucheria Date: Mon, 21 Oct 2019 17:45:10 +0530 Subject: thermal: Remove netlink support There are no users of netlink messages for thermal inside the kernel. Remove the code and adjust the documentation. Signed-off-by: Amit Kucheria Acked-by: Viresh Kumar Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/8ff02cf62186c7a54fff325fad40a2e9ca3affa6.1571656014.git.amit.kucheria@linaro.org --- include/linux/thermal.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index e45659c75920..d9111aebb97d 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -544,15 +544,4 @@ static inline void thermal_notify_framework(struct thermal_zone_device *tz, { } #endif /* CONFIG_THERMAL */ -#if defined(CONFIG_NET) && IS_ENABLED(CONFIG_THERMAL) -extern int thermal_generate_netlink_event(struct thermal_zone_device *tz, - enum events event); -#else -static inline int thermal_generate_netlink_event(struct thermal_zone_device *tz, - enum events event) -{ - return 0; -} -#endif - #endif /* __THERMAL_H__ */ -- cgit v1.2.3 From f5bf3c06730c1bd85a3c064357de433736facc5a Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 30 Oct 2019 10:10:36 +0100 Subject: thermal: cpu_cooling: Remove pointless dependency on CONFIG_OF The option CONFIG_CPU_THERMAL depends on CONFIG_OF in the Kconfig. It it pointless to check if CONFIG_OF is set in the header file as this is always true if CONFIG_CPU_THERMAL is true. Remove it. Signed-off-by: Daniel Lezcano Acked-by: Viresh Kumar Reviewed-by: Amit Kucheria Link: https://lore.kernel.org/r/20191030091038.678-1-daniel.lezcano@linaro.org --- include/linux/cpu_cooling.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpu_cooling.h b/include/linux/cpu_cooling.h index bae54bb7c048..72d1c9c5e538 100644 --- a/include/linux/cpu_cooling.h +++ b/include/linux/cpu_cooling.h @@ -47,7 +47,7 @@ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev) } #endif /* CONFIG_CPU_THERMAL */ -#if defined(CONFIG_THERMAL_OF) && defined(CONFIG_CPU_THERMAL) +#ifdef CONFIG_CPU_THERMAL /** * of_cpufreq_cooling_register - create cpufreq cooling device based on DT. * @policy: cpufreq policy. @@ -60,6 +60,6 @@ of_cpufreq_cooling_register(struct cpufreq_policy *policy) { return NULL; } -#endif /* defined(CONFIG_THERMAL_OF) && defined(CONFIG_CPU_THERMAL) */ +#endif /* CONFIG_CPU_THERMAL */ #endif /* __CPU_COOLING_H__ */ -- cgit v1.2.3 From 0cac7559f1b67aa29879ead6b6b6a856d963905f Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 30 Oct 2019 10:10:37 +0100 Subject: thermal: cpu_cooling: Reorder the header file As the conditions are simplified and unified, it is useless to have different blocks of definitions under the same compiler condition, let's merge the blocks. There is no functional change. Signed-off-by: Daniel Lezcano Acked-by: Viresh Kumar Reviewed-by: Amit Kucheria Link: https://lore.kernel.org/r/20191030091038.678-2-daniel.lezcano@linaro.org --- include/linux/cpu_cooling.h | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpu_cooling.h b/include/linux/cpu_cooling.h index 72d1c9c5e538..b74732535e4b 100644 --- a/include/linux/cpu_cooling.h +++ b/include/linux/cpu_cooling.h @@ -33,6 +33,13 @@ cpufreq_cooling_register(struct cpufreq_policy *policy); */ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev); +/** + * of_cpufreq_cooling_register - create cpufreq cooling device based on DT. + * @policy: cpufreq policy. + */ +struct thermal_cooling_device * +of_cpufreq_cooling_register(struct cpufreq_policy *policy); + #else /* !CONFIG_CPU_THERMAL */ static inline struct thermal_cooling_device * cpufreq_cooling_register(struct cpufreq_policy *policy) @@ -45,16 +52,7 @@ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev) { return; } -#endif /* CONFIG_CPU_THERMAL */ -#ifdef CONFIG_CPU_THERMAL -/** - * of_cpufreq_cooling_register - create cpufreq cooling device based on DT. - * @policy: cpufreq policy. - */ -struct thermal_cooling_device * -of_cpufreq_cooling_register(struct cpufreq_policy *policy); -#else static inline struct thermal_cooling_device * of_cpufreq_cooling_register(struct cpufreq_policy *policy) { -- cgit v1.2.3 From 27a47e422ef3cb09f6a428e2b05eb79079506875 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Wed, 30 Oct 2019 15:14:49 +0000 Subject: PM / EM: Declare EM data types unconditionally The structs representing capacity states and performance domains of an Energy Model are currently only defined for CONFIG_ENERGY_MODEL=y. That makes it hard for code outside PM_EM to manipulate those structures without a lot of ifdefery or stubbed accessors. So, move the declaration of the two structs outside of the CONFIG_ENERGY_MODEL ifdef. The client code (e.g. EAS or thermal) always checks the return of em_cpu_get() before using it, so the exising code is still safe to use as-is. Reported-by: kbuild test robot Signed-off-by: Quentin Perret Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20191030151451.7961-3-qperret@google.com --- include/linux/energy_model.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index 73f8c3cb9588..d249b88a4d5a 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -9,7 +9,6 @@ #include #include -#ifdef CONFIG_ENERGY_MODEL /** * em_cap_state - Capacity state of a performance domain * @frequency: The CPU frequency in KHz, for consistency with CPUFreq @@ -40,6 +39,7 @@ struct em_perf_domain { unsigned long cpus[0]; }; +#ifdef CONFIG_ENERGY_MODEL #define EM_CPU_MAX_POWER 0xFFFF struct em_data_callback { @@ -160,7 +160,6 @@ static inline int em_pd_nr_cap_states(struct em_perf_domain *pd) } #else -struct em_perf_domain {}; struct em_data_callback {}; #define EM_DATA_CB(_active_power_cb) { } -- cgit v1.2.3 From 9208b1e77d6e8e9776f34f46ef4079ecac9c3c25 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Wed, 6 Nov 2019 10:51:47 +0200 Subject: gpio: Add definition for GPIO direction At least for me it is difficult to remember the meaning of GPIO direction values. Define GPIO_LINE_DIRECTION_IN and GPIO_LINE_DIRECTION_OUT so that occasional GPIO contributors would not need to always check the meaning of hard coded values 1 and 0. Signed-off-by: Matti Vaittinen Signed-off-by: Linus Walleij --- include/linux/gpio/driver.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 5dd9c982e2cb..cc9ade4552d9 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -22,6 +22,9 @@ enum gpio_lookup_flags; struct gpio_chip; +#define GPIO_LINE_DIRECTION_IN 1 +#define GPIO_LINE_DIRECTION_OUT 0 + /** * struct gpio_irq_chip - GPIO interrupt controller */ -- cgit v1.2.3 From 0d95981438c3bdb53cc99b0fb656d24d7a80e1f3 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Wed, 6 Nov 2019 08:06:13 +0100 Subject: x86: efi/random: Invoke EFI_RNG_PROTOCOL to seed the UEFI RNG table Invoke the EFI_RNG_PROTOCOL protocol in the context of the x86 EFI stub, same as is done on arm/arm64 since commit 568bc4e87033 ("efi/arm*/libstub: Invoke EFI_RNG_PROTOCOL to seed the UEFI RNG table"). Within the stub, a Linux-specific RNG seed UEFI config table will be seeded. The EFI routines in the core kernel will pick that up later, yet still early during boot, to seed the kernel entropy pool. If CONFIG_RANDOM_TRUST_BOOTLOADER, entropy is credited for this seed. Signed-off-by: Dominik Brodowski Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index d87acf62958e..028efa7a9f3b 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1645,6 +1645,8 @@ static inline void efi_enable_reset_attack_mitigation(efi_system_table_t *sys_table_arg) { } #endif +efi_status_t efi_random_get_seed(efi_system_table_t *sys_table_arg); + void efi_retrieve_tpm2_eventlog(efi_system_table_t *sys_table); /* -- cgit v1.2.3 From dd5ddd3c7a8c7ac382a82d15757f0ca3ab2b2dbc Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 24 Oct 2019 16:57:39 +0100 Subject: iommu/io-pgtable-arm: Rename IOMMU_QCOM_SYS_CACHE and improve doc The 'IOMMU_QCOM_SYS_CACHE' IOMMU protection flag is exposed to all users of the IOMMU API. Despite its name, the idea behind it isn't especially tied to Qualcomm implementations and could conceivably be used by other systems. Rename it to 'IOMMU_SYS_CACHE_ONLY' and update the comment to describe a bit better the idea behind it. Cc: Robin Murphy Cc: "Isaac J. Manjarres" Signed-off-by: Will Deacon --- include/linux/iommu.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 29bac5345563..a86bd21d08a9 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -31,11 +31,11 @@ */ #define IOMMU_PRIV (1 << 5) /* - * Non-coherent masters on few Qualcomm SoCs can use this page protection flag - * to set correct cacheability attributes to use an outer level of cache - - * last level cache, aka system cache. + * Non-coherent masters can use this page protection flag to set cacheable + * memory attributes for only a transparent outer level of cache, also known as + * the last-level or system cache. */ -#define IOMMU_QCOM_SYS_CACHE (1 << 6) +#define IOMMU_SYS_CACHE_ONLY (1 << 6) struct iommu_ops; struct iommu_group; -- cgit v1.2.3 From 99c4f70df3a6446c56ca817c2d0f9c12d85d4e7c Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Wed, 6 Nov 2019 18:31:24 +0100 Subject: regulator: ab8500: Remove AB8505 USB regulator The USB regulator was removed for AB8500 in commit 41a06aa738ad ("regulator: ab8500: Remove USB regulator"). It was then added for AB8505 in commit 547f384f33db ("regulator: ab8500: add support for ab8505"). However, there was never an entry added for it in ab8505_regulator_match. This causes all regulators after it to be initialized with the wrong device tree data, eventually leading to an out-of-bounds array read. Given that it is not used anywhere in the kernel, it seems likely that similar arguments against supporting it exist for AB8505 (it is controlled by hardware). Therefore, simply remove it like for AB8500 instead of adding an entry in ab8505_regulator_match. Fixes: 547f384f33db ("regulator: ab8500: add support for ab8505") Cc: Linus Walleij Signed-off-by: Stephan Gerhold Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20191106173125.14496-1-stephan@gerhold.net Signed-off-by: Mark Brown --- include/linux/regulator/ab8500.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/regulator/ab8500.h b/include/linux/regulator/ab8500.h index 7cf8f797e13a..505e94a6e3e8 100644 --- a/include/linux/regulator/ab8500.h +++ b/include/linux/regulator/ab8500.h @@ -37,7 +37,6 @@ enum ab8505_regulator_id { AB8505_LDO_AUX6, AB8505_LDO_INTCORE, AB8505_LDO_ADC, - AB8505_LDO_USB, AB8505_LDO_AUDIO, AB8505_LDO_ANAMIC1, AB8505_LDO_ANAMIC2, -- cgit v1.2.3 From 458ea3ad033fc86e291712ce50cbe60c3428cf30 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Wed, 6 Nov 2019 18:31:25 +0100 Subject: regulator: ab8500: Remove SYSCLKREQ from enum ab8505_regulator_id Those regulators are not actually supported by the AB8500 regulator driver. There is no ab8500_regulator_info for them and no entry in ab8505_regulator_match. As such, they cannot be registered successfully, and looking them up in ab8505_regulator_match causes an out-of-bounds array read. Fixes: 547f384f33db ("regulator: ab8500: add support for ab8505") Cc: Linus Walleij Signed-off-by: Stephan Gerhold Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20191106173125.14496-2-stephan@gerhold.net Signed-off-by: Mark Brown --- include/linux/regulator/ab8500.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/regulator/ab8500.h b/include/linux/regulator/ab8500.h index 505e94a6e3e8..3ab1ddf151a2 100644 --- a/include/linux/regulator/ab8500.h +++ b/include/linux/regulator/ab8500.h @@ -42,8 +42,6 @@ enum ab8505_regulator_id { AB8505_LDO_ANAMIC2, AB8505_LDO_AUX8, AB8505_LDO_ANA, - AB8505_SYSCLKREQ_2, - AB8505_SYSCLKREQ_4, AB8505_NUM_REGULATORS, }; -- cgit v1.2.3 From 6c1b1da58f8c7a697a88ae35afeba196fc7b701e Mon Sep 17 00:00:00 2001 From: Ajay Joshi Date: Sun, 27 Oct 2019 23:05:45 +0900 Subject: block: add zone open, close and finish operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Zoned block devices (ZBC and ZAC devices) allow an explicit control over the condition (state) of zones. The operations allowed are: * Open a zone: Transition to open condition to indicate that a zone will actively be written * Close a zone: Transition to closed condition to release the drive resources used for writing to a zone * Finish a zone: Transition an open or closed zone to the full condition to prevent write operations To enable this control for in-kernel zoned block device users, define the new request operations REQ_OP_ZONE_OPEN, REQ_OP_ZONE_CLOSE and REQ_OP_ZONE_FINISH as well as the generic function blkdev_zone_mgmt() for submitting these operations on a range of zones. This results in blkdev_reset_zones() removal and replacement with this new zone magement function. Users of blkdev_reset_zones() (f2fs and dm-zoned) are updated accordingly. Contains contributions from Matias Bjorling, Hans Holmberg, Dmitry Fomichev, Keith Busch, Damien Le Moal and Christoph Hellwig. Reviewed-by: Javier González Reviewed-by: Christoph Hellwig Signed-off-by: Ajay Joshi Signed-off-by: Matias Bjorling Signed-off-by: Hans Holmberg Signed-off-by: Dmitry Fomichev Signed-off-by: Keith Busch Signed-off-by: Damien Le Moal Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 25 +++++++++++++++++++++++++ include/linux/blkdev.h | 5 +++-- 2 files changed, 28 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 1e7eeec16458..23a2fd534817 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -290,6 +290,12 @@ enum req_opf { REQ_OP_ZONE_RESET_ALL = 8, /* write the zero filled sector many times */ REQ_OP_WRITE_ZEROES = 9, + /* Open a zone */ + REQ_OP_ZONE_OPEN = 10, + /* Close a zone */ + REQ_OP_ZONE_CLOSE = 11, + /* Transition a zone to full */ + REQ_OP_ZONE_FINISH = 12, /* SCSI passthrough using struct scsi_request */ REQ_OP_SCSI_IN = 32, @@ -417,6 +423,25 @@ static inline bool op_is_discard(unsigned int op) return (op & REQ_OP_MASK) == REQ_OP_DISCARD; } +/* + * Check if a bio or request operation is a zone management operation, with + * the exception of REQ_OP_ZONE_RESET_ALL which is treated as a special case + * due to its different handling in the block layer and device response in + * case of command failure. + */ +static inline bool op_is_zone_mgmt(enum req_opf op) +{ + switch (op & REQ_OP_MASK) { + case REQ_OP_ZONE_RESET: + case REQ_OP_ZONE_OPEN: + case REQ_OP_ZONE_CLOSE: + case REQ_OP_ZONE_FINISH: + return true; + default: + return false; + } +} + static inline int op_stat_group(unsigned int op) { if (op_is_discard(op)) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d4051acb92a1..9cfafff86f66 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -360,8 +360,9 @@ extern unsigned int blkdev_nr_zones(struct block_device *bdev); extern int blkdev_report_zones(struct block_device *bdev, sector_t sector, struct blk_zone *zones, unsigned int *nr_zones); -extern int blkdev_reset_zones(struct block_device *bdev, sector_t sectors, - sector_t nr_sectors, gfp_t gfp_mask); +extern int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op, + sector_t sectors, sector_t nr_sectors, + gfp_t gfp_mask); extern int blk_revalidate_disk_zones(struct gendisk *disk); extern int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode, -- cgit v1.2.3 From e876df1fe0ad1b191284ee6ed2db7960bd322d00 Mon Sep 17 00:00:00 2001 From: Ajay Joshi Date: Sun, 27 Oct 2019 23:05:46 +0900 Subject: block: add zone open, close and finish ioctl support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce three new ioctl commands BLKOPENZONE, BLKCLOSEZONE and BLKFINISHZONE to allow applications to control the condition of zones on a zoned block device through the execution of the REQ_OP_ZONE_OPEN, REQ_OP_ZONE_CLOSE and REQ_OP_ZONE_FINISH operations. Contains contributions from Matias Bjorling, Hans Holmberg, Dmitry Fomichev, Keith Busch, Damien Le Moal and Christoph Hellwig. Reviewed-by: Javier González Reviewed-by: Christoph Hellwig Signed-off-by: Ajay Joshi Signed-off-by: Matias Bjorling Signed-off-by: Hans Holmberg Signed-off-by: Dmitry Fomichev Signed-off-by: Keith Busch Signed-off-by: Damien Le Moal Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9cfafff86f66..6a4f7abbdcf7 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -367,8 +367,8 @@ extern int blk_revalidate_disk_zones(struct gendisk *disk); extern int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg); -extern int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long arg); +extern int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long arg); #else /* CONFIG_BLK_DEV_ZONED */ @@ -389,9 +389,9 @@ static inline int blkdev_report_zones_ioctl(struct block_device *bdev, return -ENOTTY; } -static inline int blkdev_reset_zones_ioctl(struct block_device *bdev, - fmode_t mode, unsigned int cmd, - unsigned long arg) +static inline int blkdev_zone_mgmt_ioctl(struct block_device *bdev, + fmode_t mode, unsigned int cmd, + unsigned long arg) { return -ENOTTY; } -- cgit v1.2.3 From e2854a1054ab171a2c5cad6e9b7f0c580bab409d Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Mon, 4 Nov 2019 17:09:37 +0800 Subject: moduleparam: fix parameter description mismatch The first parameter of module_param is @name, but @value is used in description. Fix it. Fixes: 546970bc6afc ("param: add kerneldoc to moduleparam.h") Signed-off-by: Zhenzhong Duan Signed-off-by: Jessica Yu --- include/linux/moduleparam.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 5ba250d9172a..e5c3e23919b8 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -100,11 +100,11 @@ struct kparam_array /** * module_param - typesafe helper for a module/cmdline parameter - * @value: the variable to alter, and exposed parameter name. + * @name: the variable to alter, and exposed parameter name. * @type: the type of the parameter * @perm: visibility in sysfs. * - * @value becomes the module parameter, or (prefixed by KBUILD_MODNAME and a + * @name becomes the module parameter, or (prefixed by KBUILD_MODNAME and a * ".") the kernel commandline parameter. Note that - is changed to _, so * the user can use "foo-bar=1" even for variable "foo_bar". * -- cgit v1.2.3 From fe3e5e65c06edb1c56e64e567f053e243142001f Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 6 Nov 2019 17:43:00 -0800 Subject: efi: Enumerate EFI_MEMORY_SP UEFI 2.8 defines an EFI_MEMORY_SP attribute bit to augment the interpretation of the EFI Memory Types as "reserved for a specific purpose". The intent of this bit is to allow the OS to identify precious or scarce memory resources and optionally manage it separately from EfiConventionalMemory. As defined older OSes that do not know about this attribute are permitted to ignore it and the memory will be handled according to the OS default policy for the given memory type. In other words, this "specific purpose" hint is deliberately weaker than EfiReservedMemoryType in that the system continues to operate if the OS takes no action on the attribute. The risk of taking no action is potentially unwanted / unmovable kernel allocations from the designated resource that prevent the full realization of the "specific purpose". For example, consider a system with a high-bandwidth memory pool. Older kernels are permitted to boot and consume that memory as conventional "System-RAM" newer kernels may arrange for that memory to be set aside (soft reserved) by the system administrator for a dedicated high-bandwidth memory aware application to consume. Specifically, this mechanism allows for the elimination of scenarios where platform firmware tries to game OS policy by lying about ACPI SLIT values, i.e. claiming that a precious memory resource has a high distance to trigger the OS to avoid it by default. This reservation hint allows platform-firmware to instead tell the truth about performance characteristics by indicate to OS memory management to put immovable allocations elsewhere. Implement simple detection of the bit for EFI memory table dumps and save the kernel policy for a follow-on change. Reviewed-by: Ard Biesheuvel Reviewed-by: Dave Hansen Signed-off-by: Dan Williams Acked-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki --- include/linux/efi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index d87acf62958e..78c75992b313 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -112,6 +112,7 @@ typedef struct { #define EFI_MEMORY_MORE_RELIABLE \ ((u64)0x0000000000010000ULL) /* higher reliability */ #define EFI_MEMORY_RO ((u64)0x0000000000020000ULL) /* read-only */ +#define EFI_MEMORY_SP ((u64)0x0000000000040000ULL) /* soft reserved */ #define EFI_MEMORY_RUNTIME ((u64)0x8000000000000000ULL) /* range requires runtime mapping */ #define EFI_MEMORY_DESCRIPTOR_VERSION 1 -- cgit v1.2.3 From 6950e31b35fdf4588cbbdec1813091bb02cf8871 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 6 Nov 2019 17:43:05 -0800 Subject: x86/efi: Push EFI_MEMMAP check into leaf routines In preparation for adding another EFI_MEMMAP dependent call that needs to occur before e820__memblock_setup() fixup the existing efi calls to check for EFI_MEMMAP internally. This ends up being cleaner than the alternative of checking EFI_MEMMAP multiple times in setup_arch(). Reviewed-by: Dave Hansen Reviewed-by: Ard Biesheuvel Signed-off-by: Dan Williams Acked-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki --- include/linux/efi.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 78c75992b313..44c85b559e15 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1045,7 +1045,6 @@ extern void efi_enter_virtual_mode (void); /* switch EFI to virtual mode, if pos extern efi_status_t efi_query_variable_store(u32 attributes, unsigned long size, bool nonblocking); -extern void efi_find_mirror(void); #else static inline efi_status_t efi_query_variable_store(u32 attributes, -- cgit v1.2.3 From b617c5266eedbef2ccbb90931bb9175faa4ae0bc Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 6 Nov 2019 17:43:11 -0800 Subject: efi: Common enable/disable infrastructure for EFI soft reservation UEFI 2.8 defines an EFI_MEMORY_SP attribute bit to augment the interpretation of the EFI Memory Types as "reserved for a specific purpose". The proposed Linux behavior for specific purpose memory is that it is reserved for direct-access (device-dax) by default and not available for any kernel usage, not even as an OOM fallback. Later, through udev scripts or another init mechanism, these device-dax claimed ranges can be reconfigured and hot-added to the available System-RAM with a unique node identifier. This device-dax management scheme implements "soft" in the "soft reserved" designation by allowing some or all of the reservation to be recovered as typical memory. This policy can be disabled at compile-time with CONFIG_EFI_SOFT_RESERVE=n, or runtime with efi=nosoftreserve. As for this patch, define the common helpers to determine if the EFI_MEMORY_SP attribute should be honored. The determination needs to be made early to prevent the kernel from being loaded into soft-reserved memory, or otherwise allowing early allocations to land there. Follow-on changes are needed per architecture to leverage these helpers in their respective mem-init paths. Reviewed-by: Ard Biesheuvel Signed-off-by: Dan Williams Acked-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki --- include/linux/efi.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 44c85b559e15..88654910ce29 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1202,6 +1202,7 @@ extern int __init efi_setup_pcdp_console(char *); #define EFI_DBG 8 /* Print additional debug info at runtime */ #define EFI_NX_PE_DATA 9 /* Can runtime data regions be mapped non-executable? */ #define EFI_MEM_ATTR 10 /* Did firmware publish an EFI_MEMORY_ATTRIBUTES table? */ +#define EFI_MEM_NO_SOFT_RESERVE 11 /* Is the kernel configured to ignore soft reservations? */ #ifdef CONFIG_EFI /* @@ -1212,6 +1213,14 @@ static inline bool efi_enabled(int feature) return test_bit(feature, &efi.flags) != 0; } extern void efi_reboot(enum reboot_mode reboot_mode, const char *__unused); + +bool __pure __efi_soft_reserve_enabled(void); + +static inline bool __pure efi_soft_reserve_enabled(void) +{ + return IS_ENABLED(CONFIG_EFI_SOFT_RESERVE) + && __efi_soft_reserve_enabled(); +} #else static inline bool efi_enabled(int feature) { @@ -1225,6 +1234,11 @@ efi_capsule_pending(int *reset_type) { return false; } + +static inline bool efi_soft_reserve_enabled(void) +{ + return false; +} #endif extern int efi_status_to_err(efi_status_t status); -- cgit v1.2.3 From 262b45ae3ab4bf8e2caf1fcfd0d8307897519630 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 6 Nov 2019 17:43:16 -0800 Subject: x86/efi: EFI soft reservation to E820 enumeration UEFI 2.8 defines an EFI_MEMORY_SP attribute bit to augment the interpretation of the EFI Memory Types as "reserved for a specific purpose". The proposed Linux behavior for specific purpose memory is that it is reserved for direct-access (device-dax) by default and not available for any kernel usage, not even as an OOM fallback. Later, through udev scripts or another init mechanism, these device-dax claimed ranges can be reconfigured and hot-added to the available System-RAM with a unique node identifier. This device-dax management scheme implements "soft" in the "soft reserved" designation by allowing some or all of the reservation to be recovered as typical memory. This policy can be disabled at compile-time with CONFIG_EFI_SOFT_RESERVE=n, or runtime with efi=nosoftreserve. This patch introduces 2 new concepts at once given the entanglement between early boot enumeration relative to memory that can optionally be reserved from the kernel page allocator by default. The new concepts are: - E820_TYPE_SOFT_RESERVED: Upon detecting the EFI_MEMORY_SP attribute on EFI_CONVENTIONAL memory, update the E820 map with this new type. Only perform this classification if the CONFIG_EFI_SOFT_RESERVE=y policy is enabled, otherwise treat it as typical ram. - IORES_DESC_SOFT_RESERVED: Add a new I/O resource descriptor for a device driver to search iomem resources for application specific memory. Teach the iomem code to identify such ranges as "Soft Reserved". Note that the comment for do_add_efi_memmap() needed refreshing since it seemed to imply that the efi map might overflow the e820 table, but that is not an issue as of commit 7b6e4ba3cb1f "x86/boot/e820: Clean up the E820_X_MAX definition" that removed the 128 entry limit for e820__range_add(). A follow-on change integrates parsing of the ACPI HMAT to identify the node and sub-range boundaries of EFI_MEMORY_SP designated memory. For now, just identify and reserve memory of this type. Acked-by: Ard Biesheuvel Reported-by: kbuild test robot Reviewed-by: Dave Hansen Signed-off-by: Dan Williams Acked-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki --- include/linux/ioport.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 7bddddfc76d6..a9b9170b5dd2 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -134,6 +134,7 @@ enum { IORES_DESC_PERSISTENT_MEMORY_LEGACY = 5, IORES_DESC_DEVICE_PRIVATE_MEMORY = 6, IORES_DESC_RESERVED = 7, + IORES_DESC_SOFT_RESERVED = 8, }; /* -- cgit v1.2.3 From 33dd70752cd76f4d883a165a674f13121a4155ed Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 6 Nov 2019 17:43:31 -0800 Subject: lib: Uplevel the pmem "region" ida to a global allocator In preparation for handling platform differentiated memory types beyond persistent memory, uplevel the "region" identifier to a global number space. This enables a device-dax instance to be registered to any memory type with guaranteed unique names. Signed-off-by: Dan Williams Acked-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki --- include/linux/memregion.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 include/linux/memregion.h (limited to 'include/linux') diff --git a/include/linux/memregion.h b/include/linux/memregion.h new file mode 100644 index 000000000000..7de7c0a1444e --- /dev/null +++ b/include/linux/memregion.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _MEMREGION_H_ +#define _MEMREGION_H_ +#include +#include + +#ifdef CONFIG_MEMREGION +int memregion_alloc(gfp_t gfp); +void memregion_free(int id); +#else +static inline int memregion_alloc(gfp_t gfp) +{ + return -ENOMEM; +} +void memregion_free(int id) +{ +} +#endif +#endif /* _MEMREGION_H_ */ -- cgit v1.2.3 From a6c7f4c6aea5f4ca6056b06cec7ebd79f8c23e33 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 6 Nov 2019 17:43:43 -0800 Subject: device-dax: Add a driver for "hmem" devices Platform firmware like EFI/ACPI may publish "hmem" platform devices. Such a device is a performance differentiated memory range likely reserved for an application specific use case. The driver gives access to 100% of the capacity via a device-dax mmap instance by default. However, if over-subscription and other kernel memory management is desired the resulting dax device can be assigned to the core-mm via the kmem driver. This consumes "hmem" devices the producer of "hmem" devices is saved for a follow-on patch so that it can reference the new CONFIG_DEV_DAX_HMEM symbol to gate performing the enumeration work. Reported-by: kbuild test robot Reviewed-by: Dave Hansen Signed-off-by: Dan Williams Acked-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki --- include/linux/memregion.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memregion.h b/include/linux/memregion.h index 7de7c0a1444e..e11595256cac 100644 --- a/include/linux/memregion.h +++ b/include/linux/memregion.h @@ -4,6 +4,10 @@ #include #include +struct memregion_info { + int target_node; +}; + #ifdef CONFIG_MEMREGION int memregion_alloc(gfp_t gfp); void memregion_free(int id); -- cgit v1.2.3 From acaade1af3587132e7ea585f470a95261e14f60c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 29 Oct 2019 09:57:09 +0100 Subject: dma-direct: remove __dma_direct_free_pages We can just call dma_free_contiguous directly instead of wrapping it. Signed-off-by: Christoph Hellwig Reviewed-by: Max Filippov --- include/linux/dma-direct.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index adf993a3bd58..dec3b3bb121d 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -68,6 +68,5 @@ void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs); struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); -void __dma_direct_free_pages(struct device *dev, size_t size, struct page *page); int dma_direct_supported(struct device *dev, u64 mask); #endif /* _LINUX_DMA_DIRECT_H */ -- cgit v1.2.3 From 4e1003aa56a7d60ddb048e43a7a51368fcfe36af Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 29 Oct 2019 09:57:32 +0100 Subject: dma-direct: remove the dma_handle argument to __dma_direct_alloc_pages The argument isn't used anywhere, so stop passing it. Signed-off-by: Christoph Hellwig Reviewed-by: Max Filippov --- include/linux/dma-direct.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index dec3b3bb121d..ff3d5edc44b9 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -67,6 +67,6 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size, void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs); struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); + gfp_t gfp, unsigned long attrs); int dma_direct_supported(struct device *dev, u64 mask); #endif /* _LINUX_DMA_DIRECT_H */ -- cgit v1.2.3 From 8a80d5d6638b7d58480a83aef49d587de63d4cbb Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 7 Nov 2019 11:18:02 -0800 Subject: blk-cgroup: remove now unused blkg_print_stat_{bytes|ios}_recursive() These don't have users anymore. Remove them. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index bed9e43f9426..914ce55fa8c2 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -220,11 +220,6 @@ u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, const struct blkg_rwstat_sample *rwstat); u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, int off); -int blkg_print_stat_bytes(struct seq_file *sf, void *v); -int blkg_print_stat_ios(struct seq_file *sf, void *v); -int blkg_print_stat_bytes_recursive(struct seq_file *sf, void *v); -int blkg_print_stat_ios_recursive(struct seq_file *sf, void *v); - void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol, int off, struct blkg_rwstat_sample *sum); -- cgit v1.2.3 From f73316482977ac401ac37245c9df48079d4e11f3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 7 Nov 2019 11:18:03 -0800 Subject: blk-cgroup: reimplement basic IO stats using cgroup rstat blk-cgroup has been using blkg_rwstat to track basic IO stats. Unfortunately, reading recursive stats scales badly as itinvolves walking all descendants. On systems with a huge number of cgroups (dead or alive), this can lead to substantial CPU cost when reading IO stats. This patch reimplements basic IO stats using cgroup rstat which uses more memory but makes recursive stat reading O(# descendants which have been active since last reading) instead of O(# descendants). * blk-cgroup core no longer uses sync/async stats. Introduce new stat enums - BLKG_IOSTAT_{READ|WRITE|DISCARD}. * Add blkg_iostat[_set] which encapsulates byte and io stats, last values for propagation delta calculation and u64_stats_sync for correctness on 32bit archs. * Update the new percpu stat counters directly and implement blkcg_rstat_flush() to implement propagation. * blkg_print_stat() can now bring the stats up to date by calling cgroup_rstat_flush() and print them instead of directly summing up all descendants. * It now allocates 96 bytes per cpu. It used to be 40 bytes. Signed-off-by: Tejun Heo Cc: Dan Schatzberg Cc: Daniel Xu Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 48 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 43 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 914ce55fa8c2..867ab391e409 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -15,7 +15,9 @@ */ #include +#include #include +#include #include #include #include @@ -31,6 +33,14 @@ #ifdef CONFIG_BLK_CGROUP +enum blkg_iostat_type { + BLKG_IOSTAT_READ, + BLKG_IOSTAT_WRITE, + BLKG_IOSTAT_DISCARD, + + BLKG_IOSTAT_NR, +}; + enum blkg_rwstat_type { BLKG_RWSTAT_READ, BLKG_RWSTAT_WRITE, @@ -61,6 +71,17 @@ struct blkcg { #endif }; +struct blkg_iostat { + u64 bytes[BLKG_IOSTAT_NR]; + u64 ios[BLKG_IOSTAT_NR]; +}; + +struct blkg_iostat_set { + struct u64_stats_sync sync; + struct blkg_iostat cur; + struct blkg_iostat last; +}; + /* * blkg_[rw]stat->aux_cnt is excluded for local stats but included for * recursive. Used to carry stats of dead children. @@ -127,8 +148,8 @@ struct blkcg_gq { /* is this blkg online? protected by both blkcg and q locks */ bool online; - struct blkg_rwstat stat_bytes; - struct blkg_rwstat stat_ios; + struct blkg_iostat_set __percpu *iostat_cpu; + struct blkg_iostat_set iostat; struct blkg_policy_data *pd[BLKCG_MAX_POLS]; @@ -740,15 +761,32 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q, throtl = blk_throtl_bio(q, blkg, bio); if (!throtl) { + struct blkg_iostat_set *bis; + int rwd, cpu; + + if (op_is_discard(bio->bi_opf)) + rwd = BLKG_IOSTAT_DISCARD; + else if (op_is_write(bio->bi_opf)) + rwd = BLKG_IOSTAT_WRITE; + else + rwd = BLKG_IOSTAT_READ; + + cpu = get_cpu(); + bis = per_cpu_ptr(blkg->iostat_cpu, cpu); + u64_stats_update_begin(&bis->sync); + /* * If the bio is flagged with BIO_QUEUE_ENTERED it means this * is a split bio and we would have already accounted for the * size of the bio. */ if (!bio_flagged(bio, BIO_QUEUE_ENTERED)) - blkg_rwstat_add(&blkg->stat_bytes, bio->bi_opf, - bio->bi_iter.bi_size); - blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1); + bis->cur.bytes[rwd] += bio->bi_iter.bi_size; + bis->cur.ios[rwd]++; + + u64_stats_update_end(&bis->sync); + cgroup_rstat_updated(blkg->blkcg->css.cgroup, cpu); + put_cpu(); } blkcg_bio_issue_init(bio); -- cgit v1.2.3 From 1d156646e0d8ec390e5d5ac288137df02d4207be Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 7 Nov 2019 11:18:04 -0800 Subject: blk-cgroup: separate out blkg_rwstat under CONFIG_BLK_CGROUP_RWSTAT blkg_rwstat is now only used by bfq-iosched and blk-throtl when on cgroup1. Let's move it into its own files and gate it behind a config option. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 159 --------------------------------------------- 1 file changed, 159 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 867ab391e409..48a66738143d 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -41,17 +41,6 @@ enum blkg_iostat_type { BLKG_IOSTAT_NR, }; -enum blkg_rwstat_type { - BLKG_RWSTAT_READ, - BLKG_RWSTAT_WRITE, - BLKG_RWSTAT_SYNC, - BLKG_RWSTAT_ASYNC, - BLKG_RWSTAT_DISCARD, - - BLKG_RWSTAT_NR, - BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR, -}; - struct blkcg_gq; struct blkcg { @@ -82,19 +71,6 @@ struct blkg_iostat_set { struct blkg_iostat last; }; -/* - * blkg_[rw]stat->aux_cnt is excluded for local stats but included for - * recursive. Used to carry stats of dead children. - */ -struct blkg_rwstat { - struct percpu_counter cpu_cnt[BLKG_RWSTAT_NR]; - atomic64_t aux_cnt[BLKG_RWSTAT_NR]; -}; - -struct blkg_rwstat_sample { - u64 cnt[BLKG_RWSTAT_NR]; -}; - /* * A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a * request_queue (q). This is used by blkcg policies which need to track @@ -223,13 +199,6 @@ int blkcg_activate_policy(struct request_queue *q, void blkcg_deactivate_policy(struct request_queue *q, const struct blkcg_policy *pol); -static inline u64 blkg_rwstat_read_counter(struct blkg_rwstat *rwstat, - unsigned int idx) -{ - return atomic64_read(&rwstat->aux_cnt[idx]) + - percpu_counter_sum_positive(&rwstat->cpu_cnt[idx]); -} - const char *blkg_dev_name(struct blkcg_gq *blkg); void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, u64 (*prfill)(struct seq_file *, @@ -237,12 +206,6 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, const struct blkcg_policy *pol, int data, bool show_total); u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v); -u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, - const struct blkg_rwstat_sample *rwstat); -u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, - int off); -void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol, - int off, struct blkg_rwstat_sample *sum); struct blkg_conf_ctx { struct gendisk *disk; @@ -594,128 +557,6 @@ static inline void blkg_put(struct blkcg_gq *blkg) if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \ (p_blkg)->q, false))) -static inline int blkg_rwstat_init(struct blkg_rwstat *rwstat, gfp_t gfp) -{ - int i, ret; - - for (i = 0; i < BLKG_RWSTAT_NR; i++) { - ret = percpu_counter_init(&rwstat->cpu_cnt[i], 0, gfp); - if (ret) { - while (--i >= 0) - percpu_counter_destroy(&rwstat->cpu_cnt[i]); - return ret; - } - atomic64_set(&rwstat->aux_cnt[i], 0); - } - return 0; -} - -static inline void blkg_rwstat_exit(struct blkg_rwstat *rwstat) -{ - int i; - - for (i = 0; i < BLKG_RWSTAT_NR; i++) - percpu_counter_destroy(&rwstat->cpu_cnt[i]); -} - -/** - * blkg_rwstat_add - add a value to a blkg_rwstat - * @rwstat: target blkg_rwstat - * @op: REQ_OP and flags - * @val: value to add - * - * Add @val to @rwstat. The counters are chosen according to @rw. The - * caller is responsible for synchronizing calls to this function. - */ -static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat, - unsigned int op, uint64_t val) -{ - struct percpu_counter *cnt; - - if (op_is_discard(op)) - cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_DISCARD]; - else if (op_is_write(op)) - cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_WRITE]; - else - cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ]; - - percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH); - - if (op_is_sync(op)) - cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC]; - else - cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC]; - - percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH); -} - -/** - * blkg_rwstat_read - read the current values of a blkg_rwstat - * @rwstat: blkg_rwstat to read - * - * Read the current snapshot of @rwstat and return it in the aux counts. - */ -static inline void blkg_rwstat_read(struct blkg_rwstat *rwstat, - struct blkg_rwstat_sample *result) -{ - int i; - - for (i = 0; i < BLKG_RWSTAT_NR; i++) - result->cnt[i] = - percpu_counter_sum_positive(&rwstat->cpu_cnt[i]); -} - -/** - * blkg_rwstat_total - read the total count of a blkg_rwstat - * @rwstat: blkg_rwstat to read - * - * Return the total count of @rwstat regardless of the IO direction. This - * function can be called without synchronization and takes care of u64 - * atomicity. - */ -static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat) -{ - struct blkg_rwstat_sample tmp = { }; - - blkg_rwstat_read(rwstat, &tmp); - return tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE]; -} - -/** - * blkg_rwstat_reset - reset a blkg_rwstat - * @rwstat: blkg_rwstat to reset - */ -static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat) -{ - int i; - - for (i = 0; i < BLKG_RWSTAT_NR; i++) { - percpu_counter_set(&rwstat->cpu_cnt[i], 0); - atomic64_set(&rwstat->aux_cnt[i], 0); - } -} - -/** - * blkg_rwstat_add_aux - add a blkg_rwstat into another's aux count - * @to: the destination blkg_rwstat - * @from: the source - * - * Add @from's count including the aux one to @to's aux count. - */ -static inline void blkg_rwstat_add_aux(struct blkg_rwstat *to, - struct blkg_rwstat *from) -{ - u64 sum[BLKG_RWSTAT_NR]; - int i; - - for (i = 0; i < BLKG_RWSTAT_NR; i++) - sum[i] = percpu_counter_sum_positive(&from->cpu_cnt[i]); - - for (i = 0; i < BLKG_RWSTAT_NR; i++) - atomic64_add(sum[i] + atomic64_read(&from->aux_cnt[i]), - &to->aux_cnt[i]); -} - #ifdef CONFIG_BLK_DEV_THROTTLING extern bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, struct bio *bio); -- cgit v1.2.3 From dbbf98392af6e2cf3673908c1388ca1ae915c8bb Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Fri, 6 Sep 2019 15:06:41 +0000 Subject: memory: atmel-ebi: move NUM_CS definition inside EBI driver The total number of EBI CS lines is described by the EBI controller and not by the Matrix. Move the definition for the number of CS inside EBI driver. Signed-off-by: Tudor Ambarus Link: https://lore.kernel.org/r/20190906150632.19039-1-tudor.ambarus@microchip.com Signed-off-by: Alexandre Belloni --- include/linux/mfd/syscon/atmel-matrix.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/syscon/atmel-matrix.h b/include/linux/mfd/syscon/atmel-matrix.h index f61cd127a852..20c25665216a 100644 --- a/include/linux/mfd/syscon/atmel-matrix.h +++ b/include/linux/mfd/syscon/atmel-matrix.h @@ -106,7 +106,6 @@ #define AT91_MATRIX_DDR_IOSR BIT(18) #define AT91_MATRIX_NFD0_SELECT BIT(24) #define AT91_MATRIX_DDR_MP_EN BIT(25) -#define AT91_MATRIX_EBI_NUM_CS 8 #define AT91_MATRIX_USBPUCR_PUON BIT(30) -- cgit v1.2.3 From de7d5084d82794a8e83afb994fcb07f82da3cd7b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Nov 2019 16:27:14 -0800 Subject: net: provide dev_lstats_read() helper Many network drivers use hand-coded implementation of the same thing, let's factorize things so that u64_stats_t adoption is done once. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1f140a6b66df..75561992c31f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2401,6 +2401,8 @@ struct pcpu_lstats { struct u64_stats_sync syncp; } __aligned(2 * sizeof(u64)); +void dev_lstats_read(struct net_device *dev, u64 *packets, u64 *bytes); + #define __netdev_alloc_pcpu_stats(type, gfp) \ ({ \ typeof(type) __percpu *pcpu_stats = alloc_percpu_gfp(type, gfp);\ -- cgit v1.2.3 From dd5382a08157756510aa8d7269c662eccde775cb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Nov 2019 16:27:15 -0800 Subject: net: provide dev_lstats_add() helper Many network drivers need it and hand-coded the same function. In order to ease u64_stats_t adoption, it is time to factorize. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 75561992c31f..461a36220cf4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2403,6 +2403,16 @@ struct pcpu_lstats { void dev_lstats_read(struct net_device *dev, u64 *packets, u64 *bytes); +static inline void dev_lstats_add(struct net_device *dev, unsigned int len) +{ + struct pcpu_lstats *lstats = this_cpu_ptr(dev->lstats); + + u64_stats_update_begin(&lstats->syncp); + lstats->bytes += len; + lstats->packets++; + u64_stats_update_end(&lstats->syncp); +} + #define __netdev_alloc_pcpu_stats(type, gfp) \ ({ \ typeof(type) __percpu *pcpu_stats = alloc_percpu_gfp(type, gfp);\ -- cgit v1.2.3 From 316580b69d0a7aeeee5063af47438b626bc47cbd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Nov 2019 16:27:20 -0800 Subject: u64_stats: provide u64_stats_t type On 64bit arches, struct u64_stats_sync is empty and provides no help against load/store tearing. Using READ_ONCE()/WRITE_ONCE() would be needed. But the update side would be slightly more expensive. local64_t was defined so that we could use regular adds in a manner which is atomic wrt IRQs. However the u64_stats infra means we do not have to use local64_t on 32bit arches since the syncp provides the needed protection. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/u64_stats_sync.h | 51 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 47 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h index a27604f99ed0..9de5c10293f5 100644 --- a/include/linux/u64_stats_sync.h +++ b/include/linux/u64_stats_sync.h @@ -40,8 +40,8 @@ * spin_lock_bh(...) or other synchronization to get exclusive access * ... * u64_stats_update_begin(&stats->syncp); - * stats->bytes64 += len; // non atomic operation - * stats->packets64++; // non atomic operation + * u64_stats_add(&stats->bytes64, len); // non atomic operation + * u64_stats_inc(&stats->packets64); // non atomic operation * u64_stats_update_end(&stats->syncp); * * While a consumer (reader) should use following template to get consistent @@ -52,8 +52,8 @@ * * do { * start = u64_stats_fetch_begin(&stats->syncp); - * tbytes = stats->bytes64; // non atomic operation - * tpackets = stats->packets64; // non atomic operation + * tbytes = u64_stats_read(&stats->bytes64); // non atomic operation + * tpackets = u64_stats_read(&stats->packets64); // non atomic operation * } while (u64_stats_fetch_retry(&stats->syncp, start)); * * @@ -68,6 +68,49 @@ struct u64_stats_sync { #endif }; +#if BITS_PER_LONG == 64 +#include + +typedef struct { + local64_t v; +} u64_stats_t ; + +static inline u64 u64_stats_read(const u64_stats_t *p) +{ + return local64_read(&p->v); +} + +static inline void u64_stats_add(u64_stats_t *p, unsigned long val) +{ + local64_add(val, &p->v); +} + +static inline void u64_stats_inc(u64_stats_t *p) +{ + local64_inc(&p->v); +} + +#else + +typedef struct { + u64 v; +} u64_stats_t; + +static inline u64 u64_stats_read(const u64_stats_t *p) +{ + return p->v; +} + +static inline void u64_stats_add(u64_stats_t *p, unsigned long val) +{ + p->v += val; +} + +static inline void u64_stats_inc(u64_stats_t *p) +{ + p->v++; +} +#endif static inline void u64_stats_init(struct u64_stats_sync *syncp) { -- cgit v1.2.3 From fd2f4737870eb866537fbbffa2b59414b9b0c0a2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Nov 2019 16:27:22 -0800 Subject: net: use u64_stats_t in struct pcpu_lstats In order to fix the data-race found by KCSAN, we can use the new u64_stats_t type and its accessors instead of plain u64 fields. This will still generate optimal code for both 32 and 64 bit platforms. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 461a36220cf4..f857f01234f7 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2396,8 +2396,8 @@ struct pcpu_sw_netstats { } __aligned(4 * sizeof(u64)); struct pcpu_lstats { - u64 packets; - u64 bytes; + u64_stats_t packets; + u64_stats_t bytes; struct u64_stats_sync syncp; } __aligned(2 * sizeof(u64)); @@ -2408,8 +2408,8 @@ static inline void dev_lstats_add(struct net_device *dev, unsigned int len) struct pcpu_lstats *lstats = this_cpu_ptr(dev->lstats); u64_stats_update_begin(&lstats->syncp); - lstats->bytes += len; - lstats->packets++; + u64_stats_add(&lstats->bytes, len); + u64_stats_inc(&lstats->packets); u64_stats_update_end(&lstats->syncp); } -- cgit v1.2.3 From f8cc62ca3e660ae3fdaee533b1d554297cd2ae82 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Nov 2019 18:49:43 -0800 Subject: net: add a READ_ONCE() in skb_peek_tail() skb_peek_tail() can be used without protection of a lock, as spotted by KCSAN [1] In order to avoid load-stearing, add a READ_ONCE() Note that the corresponding WRITE_ONCE() are already there. [1] BUG: KCSAN: data-race in sk_wait_data / skb_queue_tail read to 0xffff8880b36a4118 of 8 bytes by task 20426 on cpu 1: skb_peek_tail include/linux/skbuff.h:1784 [inline] sk_wait_data+0x15b/0x250 net/core/sock.c:2477 kcm_wait_data+0x112/0x1f0 net/kcm/kcmsock.c:1103 kcm_recvmsg+0xac/0x320 net/kcm/kcmsock.c:1130 sock_recvmsg_nosec net/socket.c:871 [inline] sock_recvmsg net/socket.c:889 [inline] sock_recvmsg+0x92/0xb0 net/socket.c:885 ___sys_recvmsg+0x1a0/0x3e0 net/socket.c:2480 do_recvmmsg+0x19a/0x5c0 net/socket.c:2601 __sys_recvmmsg+0x1ef/0x200 net/socket.c:2680 __do_sys_recvmmsg net/socket.c:2703 [inline] __se_sys_recvmmsg net/socket.c:2696 [inline] __x64_sys_recvmmsg+0x89/0xb0 net/socket.c:2696 do_syscall_64+0xcc/0x370 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x44/0xa9 write to 0xffff8880b36a4118 of 8 bytes by task 451 on cpu 0: __skb_insert include/linux/skbuff.h:1852 [inline] __skb_queue_before include/linux/skbuff.h:1958 [inline] __skb_queue_tail include/linux/skbuff.h:1991 [inline] skb_queue_tail+0x7e/0xc0 net/core/skbuff.c:3145 kcm_queue_rcv_skb+0x202/0x310 net/kcm/kcmsock.c:206 kcm_rcv_strparser+0x74/0x4b0 net/kcm/kcmsock.c:370 __strp_recv+0x348/0xf50 net/strparser/strparser.c:309 strp_recv+0x84/0xa0 net/strparser/strparser.c:343 tcp_read_sock+0x174/0x5c0 net/ipv4/tcp.c:1639 strp_read_sock+0xd4/0x140 net/strparser/strparser.c:366 do_strp_work net/strparser/strparser.c:414 [inline] strp_work+0x9a/0xe0 net/strparser/strparser.c:423 process_one_work+0x3d4/0x890 kernel/workqueue.c:2269 worker_thread+0xa0/0x800 kernel/workqueue.c:2415 kthread+0x1d4/0x200 drivers/block/aoe/aoecmd.c:1253 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:352 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 451 Comm: kworker/u4:3 Not tainted 5.4.0-rc3+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: kstrp strp_work Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- include/linux/skbuff.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 53238ac725a3..dfe02b658829 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1795,7 +1795,7 @@ static inline struct sk_buff *skb_peek_next(struct sk_buff *skb, */ static inline struct sk_buff *skb_peek_tail(const struct sk_buff_head *list_) { - struct sk_buff *skb = list_->prev; + struct sk_buff *skb = READ_ONCE(list_->prev); if (skb == (struct sk_buff *)list_) skb = NULL; @@ -1861,7 +1861,9 @@ static inline void __skb_insert(struct sk_buff *newsk, struct sk_buff *prev, struct sk_buff *next, struct sk_buff_head *list) { - /* see skb_queue_empty_lockless() for the opposite READ_ONCE() */ + /* See skb_queue_empty_lockless() and skb_peek_tail() + * for the opposite READ_ONCE() + */ WRITE_ONCE(newsk->next, next); WRITE_ONCE(newsk->prev, prev); WRITE_ONCE(next->prev, newsk); -- cgit v1.2.3 From 5bd90b0989731520f2cdcfbbe467f1271f3cc803 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 7 Nov 2019 16:04:11 +0000 Subject: KVM: vgic-v4: Track the number of VLPIs per vcpu In order to find out whether a vcpu is likely to be the target of VLPIs (and to further optimize the way we deal with those), let's track the number of VLPIs a vcpu can receive. This gets implemented with an atomic variable that gets incremented or decremented on map, unmap and move of a VLPI. Signed-off-by: Marc Zyngier Reviewed-by: Zenghui Yu Reviewed-by: Christoffer Dall Link: https://lore.kernel.org/r/20191107160412.30301-2-maz@kernel.org --- include/linux/irqchip/arm-gic-v4.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h index ab1396afe08a..5dbcfc65f21e 100644 --- a/include/linux/irqchip/arm-gic-v4.h +++ b/include/linux/irqchip/arm-gic-v4.h @@ -32,6 +32,8 @@ struct its_vm { struct its_vpe { struct page *vpt_page; struct its_vm *its_vm; + /* per-vPE VLPI tracking */ + atomic_t vlpi_count; /* Doorbell interrupt */ int irq; irq_hw_number_t vpe_db_lpi; -- cgit v1.2.3 From bc9ad9e40dbc4c8874e806345df393a9cfeadad3 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 6 Nov 2019 09:33:02 +0000 Subject: EDAC: Replace EDAC_DIMM_PTR() macro with edac_get_dimm() function The EDAC_DIMM_PTR() macro takes 3 arguments from struct mem_ctl_info. Clean up this interface to only pass the mci struct and replace this macro with a new function edac_get_dimm(). Also introduce an edac_get_dimm_by_index() function for later use. This allows it to get a DIMM pointer only by a given index. This can be useful if the DIMM's position within the layers of the memory controller or the exact size of the layers are unknown. Small style changes made for some hunks after applying the semantic patch. Semantic patch used: @@ expression mci, a, b,c; @@ -EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers, a, b, c) +edac_get_dimm(mci, a, b, c) [ bp: Touchups. ] Signed-off-by: Robert Richter Signed-off-by: Borislav Petkov Reviewed-by: Mauro Carvalho Chehab Cc: "linux-edac@vger.kernel.org" Cc: James Morse Cc: Jason Baron Cc: Qiuxu Zhuo Cc: Tero Kristo Cc: Tony Luck Link: https://lkml.kernel.org/r/20191106093239.25517-2-rrichter@marvell.com --- include/linux/edac.h | 89 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 57 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/include/linux/edac.h b/include/linux/edac.h index c19483b90079..280d109b9d05 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -403,37 +403,6 @@ struct edac_mc_layer { __i; \ }) -/** - * EDAC_DIMM_PTR - Macro responsible to get a pointer inside a pointer array - * for the element given by [layer0,layer1,layer2] position - * - * @layers: a struct edac_mc_layer array, describing how many elements - * were allocated for each layer - * @var: name of the var where we want to get the pointer - * (like mci->dimms) - * @nlayers: Number of layers at the @layers array - * @layer0: layer0 position - * @layer1: layer1 position. Unused if n_layers < 2 - * @layer2: layer2 position. Unused if n_layers < 3 - * - * For 1 layer, this macro returns "var[layer0]"; - * - * For 2 layers, this macro is similar to allocate a bi-dimensional array - * and to return "var[layer0][layer1]"; - * - * For 3 layers, this macro is similar to allocate a tri-dimensional array - * and to return "var[layer0][layer1][layer2]"; - */ -#define EDAC_DIMM_PTR(layers, var, nlayers, layer0, layer1, layer2) ({ \ - typeof(*var) __p; \ - int ___i = EDAC_DIMM_OFF(layers, nlayers, layer0, layer1, layer2); \ - if (___i < 0) \ - __p = NULL; \ - else \ - __p = (var)[___i]; \ - __p; \ -}) - struct dimm_info { struct device dev; @@ -669,4 +638,60 @@ struct mem_ctl_info { bool fake_inject_ue; u16 fake_inject_count; }; -#endif + +/** + * edac_get_dimm_by_index - Get DIMM info at @index from a memory + * controller + * + * @mci: MC descriptor struct mem_ctl_info + * @index: index in the memory controller's DIMM array + * + * Returns a struct dimm_info * or NULL on failure. + */ +static inline struct dimm_info * +edac_get_dimm_by_index(struct mem_ctl_info *mci, int index) +{ + if (index < 0 || index >= mci->tot_dimms) + return NULL; + + return mci->dimms[index]; +} + +/** + * edac_get_dimm - Get DIMM info from a memory controller given by + * [layer0,layer1,layer2] position + * + * @mci: MC descriptor struct mem_ctl_info + * @layer0: layer0 position + * @layer1: layer1 position. Unused if n_layers < 2 + * @layer2: layer2 position. Unused if n_layers < 3 + * + * For 1 layer, this function returns "dimms[layer0]"; + * + * For 2 layers, this function is similar to allocating a two-dimensional + * array and returning "dimms[layer0][layer1]"; + * + * For 3 layers, this function is similar to allocating a tri-dimensional + * array and returning "dimms[layer0][layer1][layer2]"; + */ +static inline struct dimm_info *edac_get_dimm(struct mem_ctl_info *mci, + int layer0, int layer1, int layer2) +{ + int index; + + if (layer0 < 0 + || (mci->n_layers > 1 && layer1 < 0) + || (mci->n_layers > 2 && layer2 < 0)) + return NULL; + + index = layer0; + + if (mci->n_layers > 1) + index = index * mci->layers[1].size + layer1; + + if (mci->n_layers > 2) + index = index * mci->layers[2].size + layer2; + + return edac_get_dimm_by_index(mci, index); +} +#endif /* _LINUX_EDAC_H_ */ -- cgit v1.2.3 From 977b1ce7c117905b3138dc727ed25f8af2ba2902 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 6 Nov 2019 09:33:04 +0000 Subject: EDAC: Remove EDAC_DIMM_OFF() macro The EDAC_DIMM_OFF() macro takes 5 arguments to get the DIMM's index. Simplify this by storing the index in struct dimm_info to avoid its calculation and remove the EDAC_DIMM_OFF() macro. The index can be directly used then. Another advantage is that edac_mc_alloc() could be used even if the exact size of the layers is unknown. Only the number of DIMMs would be needed. Rename iterator variable to idx, while at it. The name is more handy, esp. when searching for it in the code. Signed-off-by: Robert Richter Signed-off-by: Borislav Petkov Reviewed-by: Mauro Carvalho Chehab Cc: "linux-edac@vger.kernel.org" Cc: James Morse Cc: Tony Luck Link: https://lkml.kernel.org/r/20191106093239.25517-3-rrichter@marvell.com --- include/linux/edac.h | 45 ++++----------------------------------------- 1 file changed, 4 insertions(+), 41 deletions(-) (limited to 'include/linux') diff --git a/include/linux/edac.h b/include/linux/edac.h index 280d109b9d05..f4ebb14bc406 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -362,47 +362,6 @@ struct edac_mc_layer { */ #define EDAC_MAX_LAYERS 3 -/** - * EDAC_DIMM_OFF - Macro responsible to get a pointer offset inside a pointer - * array for the element given by [layer0,layer1,layer2] - * position - * - * @layers: a struct edac_mc_layer array, describing how many elements - * were allocated for each layer - * @nlayers: Number of layers at the @layers array - * @layer0: layer0 position - * @layer1: layer1 position. Unused if n_layers < 2 - * @layer2: layer2 position. Unused if n_layers < 3 - * - * For 1 layer, this macro returns "var[layer0] - var"; - * - * For 2 layers, this macro is similar to allocate a bi-dimensional array - * and to return "var[layer0][layer1] - var"; - * - * For 3 layers, this macro is similar to allocate a tri-dimensional array - * and to return "var[layer0][layer1][layer2] - var". - * - * A loop could be used here to make it more generic, but, as we only have - * 3 layers, this is a little faster. - * - * By design, layers can never be 0 or more than 3. If that ever happens, - * a NULL is returned, causing an OOPS during the memory allocation routine, - * with would point to the developer that he's doing something wrong. - */ -#define EDAC_DIMM_OFF(layers, nlayers, layer0, layer1, layer2) ({ \ - int __i; \ - if ((nlayers) == 1) \ - __i = layer0; \ - else if ((nlayers) == 2) \ - __i = (layer1) + ((layers[1]).size * (layer0)); \ - else if ((nlayers) == 3) \ - __i = (layer2) + ((layers[2]).size * ((layer1) + \ - ((layers[1]).size * (layer0)))); \ - else \ - __i = -EINVAL; \ - __i; \ -}) - struct dimm_info { struct device dev; @@ -412,6 +371,7 @@ struct dimm_info { unsigned int location[EDAC_MAX_LAYERS]; struct mem_ctl_info *mci; /* the parent */ + unsigned int idx; /* index within the parent dimm array */ u32 grain; /* granularity of reported error in bytes */ enum dev_type dtype; /* memory device type */ @@ -654,6 +614,9 @@ edac_get_dimm_by_index(struct mem_ctl_info *mci, int index) if (index < 0 || index >= mci->tot_dimms) return NULL; + if (WARN_ON_ONCE(mci->dimms[index]->idx != index)) + return NULL; + return mci->dimms[index]; } -- cgit v1.2.3 From 5bd54539788b3b3a415e84204cc89f918658d56d Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Tue, 22 Oct 2019 18:48:06 -0500 Subject: soundwire: remove bitfield for unique_id, use u8 There is no good reason why the unique_id needs to be stored as 4 bits. The code will work without changes with a u8 since all values are already filtered while parsing the ACPI tables and Slave devID registers. Use u8 representation. This will allow us to encode a "IGNORE_UNIQUE_ID" value to account for firmware/BIOS creativity. Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20191022234808.17432-2-pierre-louis.bossart@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 688b40e65c89..28745b9ba279 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -403,6 +403,8 @@ int sdw_slave_read_prop(struct sdw_slave *slave); * SDW Slave Structures and APIs */ +#define SDW_IGNORED_UNIQUE_ID 0xFF + /** * struct sdw_slave_id - Slave ID * @mfg_id: MIPI Manufacturer ID @@ -418,7 +420,7 @@ struct sdw_slave_id { __u16 mfg_id; __u16 part_id; __u8 class_id; - __u8 unique_id:4; + __u8 unique_id; __u8 sdw_version:4; }; -- cgit v1.2.3 From 727b3668b730634228fc65c336c2a7a080e02885 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 8 Nov 2019 17:39:29 +0000 Subject: net: sfp: rework upstream interface The current upstream interface is an all-or-nothing, which is sub-optimal for future changes, as it doesn't allow the upstream driver to prepare for the SFP module becoming available, as it is at boot. Switch to a find-sfp-bus, add-upstream, del-upstream, put-sfp-bus interface structure instead, which allows the upstream driver to prepare for a module being available as soon as add-upstream is called. Signed-off-by: Russell King Signed-off-by: David S. Miller --- include/linux/sfp.h | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sfp.h b/include/linux/sfp.h index 355a08a76fd4..c8464de7cff5 100644 --- a/include/linux/sfp.h +++ b/include/linux/sfp.h @@ -508,10 +508,11 @@ int sfp_get_module_eeprom(struct sfp_bus *bus, struct ethtool_eeprom *ee, u8 *data); void sfp_upstream_start(struct sfp_bus *bus); void sfp_upstream_stop(struct sfp_bus *bus); -struct sfp_bus *sfp_register_upstream_node(struct fwnode_handle *fwnode, - void *upstream, - const struct sfp_upstream_ops *ops); -void sfp_unregister_upstream(struct sfp_bus *bus); +void sfp_bus_put(struct sfp_bus *bus); +struct sfp_bus *sfp_bus_find_fwnode(struct fwnode_handle *fwnode); +int sfp_bus_add_upstream(struct sfp_bus *bus, void *upstream, + const struct sfp_upstream_ops *ops); +void sfp_bus_del_upstream(struct sfp_bus *bus); #else static inline int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id, @@ -553,14 +554,22 @@ static inline void sfp_upstream_stop(struct sfp_bus *bus) { } -static inline struct sfp_bus *sfp_register_upstream_node( - struct fwnode_handle *fwnode, void *upstream, - const struct sfp_upstream_ops *ops) +static inline void sfp_bus_put(struct sfp_bus *bus) +{ +} + +static inline struct sfp_bus *sfp_bus_find_fwnode(struct fwnode_handle *fwnode) { return NULL; } -static inline void sfp_unregister_upstream(struct sfp_bus *bus) +static int sfp_bus_add_upstream(struct sfp_bus *bus, void *upstream, + const struct sfp_upstream_ops *ops) +{ + return 0; +} + +static inline void sfp_bus_del_upstream(struct sfp_bus *bus) { } #endif -- cgit v1.2.3 From c498afaf7df87f44e7cb383c135baec52b5259be Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 6 Nov 2019 09:33:07 +0000 Subject: EDAC: Introduce an mci_for_each_dimm() iterator Introduce an mci_for_each_dimm() iterator. It returns a pointer to a struct dimm_info. This makes the declaration and use of an index obsolete and avoids access to internal data of struct mci (direct array access etc). [ bp: push the struct dimm_info *dimm; declaration into the CONFIG_EDAC_DEBUG block. ] Signed-off-by: Robert Richter Signed-off-by: Borislav Petkov Reviewed-by: Mauro Carvalho Chehab Cc: "linux-edac@vger.kernel.org" Cc: James Morse Cc: Tony Luck Link: https://lkml.kernel.org/r/20191106093239.25517-4-rrichter@marvell.com --- include/linux/edac.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/edac.h b/include/linux/edac.h index f4ebb14bc406..31f99d48b024 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -599,6 +599,13 @@ struct mem_ctl_info { u16 fake_inject_count; }; +#define mci_for_each_dimm(mci, dimm) \ + for ((dimm) = (mci)->dimms[0]; \ + (dimm); \ + (dimm) = (dimm)->idx + 1 < (mci)->tot_dimms \ + ? (mci)->dimms[(dimm)->idx + 1] \ + : NULL) + /** * edac_get_dimm_by_index - Get DIMM info at @index from a memory * controller -- cgit v1.2.3 From 98edb865bd3ee2a67e51e0d947208f3a2129a460 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 6 Nov 2019 09:33:18 +0000 Subject: EDAC: Remove misleading comment in struct edac_raw_error_desc There never has been such function edac_raw_error_desc_clean() and in function ghes_edac_report_mem_error() the whole struct is zero'ed including the string arrays. Remove that comment. Signed-off-by: Robert Richter Signed-off-by: Borislav Petkov Reviewed-by: Mauro Carvalho Chehab Cc: "linux-edac@vger.kernel.org" Cc: James Morse Cc: Tony Luck Link: https://lkml.kernel.org/r/20191106093239.25517-9-rrichter@marvell.com --- include/linux/edac.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/edac.h b/include/linux/edac.h index 31f99d48b024..cc31b9742684 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -457,15 +457,10 @@ struct errcount_attribute_data { * (typically, a memory controller error) */ struct edac_raw_error_desc { - /* - * NOTE: everything before grain won't be cleaned by - * edac_raw_error_desc_clean() - */ char location[LOCATION_SIZE]; char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * EDAC_MAX_LABELS]; long grain; - /* the vars below and grain will be cleaned on every new error report */ u16 error_count; int top_layer; int mid_layer; -- cgit v1.2.3 From ffedbf0cba153c91a0da5d1280a5e639664c5ab3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 8 Nov 2019 16:57:59 +0000 Subject: irqchip/gic-v3-its: Kill its->ite_size and use TYPER copy instead Now that we have a copy of TYPER in the ITS structure, rely on this to provide the same service as its->ite_size, which gets axed. Errata workarounds are now updating the cached fields instead of requiring a separate field in the ITS structure. Signed-off-by: Marc Zyngier Reviewed-by: Zenghui Yu Link: https://lore.kernel.org/r/20191027144234.8395-6-maz@kernel.org Link: https://lore.kernel.org/r/20191108165805.3071-6-maz@kernel.org --- include/linux/irqchip/arm-gic-v3.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 5cc10cf7cb3e..4bce7a904075 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -334,7 +334,7 @@ #define GITS_TYPER_PLPIS (1UL << 0) #define GITS_TYPER_VLPIS (1UL << 1) #define GITS_TYPER_ITT_ENTRY_SIZE_SHIFT 4 -#define GITS_TYPER_ITT_ENTRY_SIZE(r) ((((r) >> GITS_TYPER_ITT_ENTRY_SIZE_SHIFT) & 0xf) + 1) +#define GITS_TYPER_ITT_ENTRY_SIZE GENMASK_ULL(7, 4) #define GITS_TYPER_IDBITS_SHIFT 8 #define GITS_TYPER_DEVBITS_SHIFT 13 #define GITS_TYPER_DEVBITS(r) ((((r) >> GITS_TYPER_DEVBITS_SHIFT) & 0x1f) + 1) -- cgit v1.2.3 From 576a83429757999f220f36f206044af2b9026672 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 8 Nov 2019 16:58:00 +0000 Subject: irqchip/gic-v3-its: Kill its->device_ids and use TYPER copy instead Now that we have a copy of TYPER in the ITS structure, rely on this to provide the same service as its->device_ids, which gets axed. Errata workarounds are now updating the cached fields instead of requiring a separate field in the ITS structure. Signed-off-by: Marc Zyngier Reviewed-by: Zenghui Yu Link: https://lore.kernel.org/r/20191027144234.8395-7-maz@kernel.org Link: https://lore.kernel.org/r/20191108165805.3071-7-maz@kernel.org --- include/linux/irqchip/arm-gic-v3.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 4bce7a904075..b6514e8893bf 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -337,7 +337,7 @@ #define GITS_TYPER_ITT_ENTRY_SIZE GENMASK_ULL(7, 4) #define GITS_TYPER_IDBITS_SHIFT 8 #define GITS_TYPER_DEVBITS_SHIFT 13 -#define GITS_TYPER_DEVBITS(r) ((((r) >> GITS_TYPER_DEVBITS_SHIFT) & 0x1f) + 1) +#define GITS_TYPER_DEVBITS GENMASK_ULL(17, 13) #define GITS_TYPER_PTA (1UL << 19) #define GITS_TYPER_HCC_SHIFT 24 #define GITS_TYPER_HCC(r) (((r) >> GITS_TYPER_HCC_SHIFT) & 0xff) -- cgit v1.2.3 From 20b44b4de61f2887694981e8cae74fe1bf58f950 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Wed, 2 Oct 2019 19:25:21 +0800 Subject: irqchip: ingenic: Drop redundant irq_suspend / irq_resume functions The same behaviour can be obtained by using the IRQCHIP_MASK_ON_SUSPEND flag on the IRQ chip. Signed-off-by: Paul Cercueil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/1570015525-27018-2-git-send-email-zhouyanjie@zoho.com --- include/linux/irqchip/ingenic.h | 14 -------------- 1 file changed, 14 deletions(-) delete mode 100644 include/linux/irqchip/ingenic.h (limited to 'include/linux') diff --git a/include/linux/irqchip/ingenic.h b/include/linux/irqchip/ingenic.h deleted file mode 100644 index 146558853ad4..000000000000 --- a/include/linux/irqchip/ingenic.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2010, Lars-Peter Clausen - */ - -#ifndef __LINUX_IRQCHIP_INGENIC_H__ -#define __LINUX_IRQCHIP_INGENIC_H__ - -#include - -extern void ingenic_intc_irq_suspend(struct irq_data *data); -extern void ingenic_intc_irq_resume(struct irq_data *data); - -#endif -- cgit v1.2.3 From 25cb20a212a1f989385dfe23230817e69c62bee5 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 16 Oct 2019 16:57:53 +0200 Subject: PM / OPP: Support adjusting OPP voltages at runtime On some SoCs the Adaptive Voltage Scaling (AVS) technique is employed to optimize the operating voltage of a device. At a given frequency, the hardware monitors dynamic factors and either makes a suggestion for how much to adjust a voltage for the current frequency, or it automatically adjusts the voltage without software intervention. Add an API to the OPP library for the former case, so that AVS type devices can update the voltages for an OPP when the hardware determines the voltage should change. The assumption is that drivers like CPUfreq or devfreq will register for the OPP notifiers and adjust the voltage according to suggestions that AVS makes. This patch is derived from [1] submitted by Stephen. [1] https://lore.kernel.org/patchwork/patch/599279/ Signed-off-by: Stephen Boyd [Roger Lu: Changed to rcu less implementation] Signed-off-by: Roger Lu [s.nawrocki@samsung.com: added handling of OPP min/max voltage] Signed-off-by: Sylwester Nawrocki Signed-off-by: Viresh Kumar --- include/linux/pm_opp.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index b8197ab014f2..747861816f4f 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -22,6 +22,7 @@ struct opp_table; enum dev_pm_opp_event { OPP_EVENT_ADD, OPP_EVENT_REMOVE, OPP_EVENT_ENABLE, OPP_EVENT_DISABLE, + OPP_EVENT_ADJUST_VOLTAGE, }; /** @@ -113,6 +114,10 @@ int dev_pm_opp_add(struct device *dev, unsigned long freq, void dev_pm_opp_remove(struct device *dev, unsigned long freq); void dev_pm_opp_remove_all_dynamic(struct device *dev); +int dev_pm_opp_adjust_voltage(struct device *dev, unsigned long freq, + unsigned long u_volt, unsigned long u_volt_min, + unsigned long u_volt_max); + int dev_pm_opp_enable(struct device *dev, unsigned long freq); int dev_pm_opp_disable(struct device *dev, unsigned long freq); @@ -242,6 +247,14 @@ static inline void dev_pm_opp_remove_all_dynamic(struct device *dev) { } +static inline int +dev_pm_opp_adjust_voltage(struct device *dev, unsigned long freq, + unsigned long u_volt, unsigned long u_volt_min, + unsigned long u_volt_max) +{ + return 0; +} + static inline int dev_pm_opp_enable(struct device *dev, unsigned long freq) { return 0; -- cgit v1.2.3 From 39d1e3340c73e8f7eb1d6a8cae561c255ca7b1b0 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Thu, 7 Nov 2019 08:41:51 +0000 Subject: mtd: spi-nor: Fix clearing of QE bit on lock()/unlock() Make sure that when doing a lock() or an unlock() operation we don't clear the QE bit from Status Register 2. JESD216 revB or later offers information about the *default* Status Register commands to use (see BFPT DWORDS[15], bits 22:20). In this standard, Status Register 1 refers to the first data byte transferred on a Read Status (05h) or Write Status (01h) command. Status register 2 refers to the byte read using instruction 35h. Status register 2 is the second byte transferred in a Write Status (01h) command. Industry naming and definitions of these Status Registers may differ. The definitions are described in JESD216B, BFPT DWORDS[15], bits 22:20. There are cases in which writing only one byte to the Status Register 1 has the side-effect of clearing Status Register 2 and implicitly the Quad Enable bit. This side-effect is hit just by the BFPT_DWORD15_QER_SR2_BIT1_BUGGY and BFPT_DWORD15_QER_SR2_BIT1 cases. Suggested-by: Boris Brezillon Signed-off-by: Tudor Ambarus Reviewed-by: Vignesh Raghavendra --- include/linux/mtd/spi-nor.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index d1d736d3c8ab..d6ec55cc6d97 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -243,6 +243,9 @@ enum spi_nor_option_flags { SNOR_F_4B_OPCODES = BIT(6), SNOR_F_HAS_4BAIT = BIT(7), SNOR_F_HAS_LOCK = BIT(8), + SNOR_F_HAS_16BIT_SR = BIT(9), + SNOR_F_NO_READ_CR = BIT(10), + }; /** -- cgit v1.2.3 From 3e0930f109e76922ea1742a9c8c1cc16f052ad45 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Thu, 7 Nov 2019 08:41:55 +0000 Subject: mtd: spi-nor: Rework the disabling of block write protection spi_nor_unlock() unlocks blocks of memory or the entire flash memory array, if requested. clear_sr_bp() unlocks the entire flash memory array at boot time. This calls for some unification, clear_sr_bp() is just an optimization for the case when the unlock request covers the entire flash size. Get rid of clear_sr_bp() and introduce spi_nor_unlock_all(), which is just a call to spi_nor_unlock() for the entire flash memory array. This fixes a bug that was present in spi_nor_spansion_clear_sr_bp(). When the QE bit was zero, we used the Write Status (01h) command with one data byte, which might cleared the Status Register 2. We now always use the Write Status (01h) command with two data bytes when SNOR_F_HAS_16BIT_SR is set, to avoid clearing the Status Register 2. The SNOR_F_NO_READ_CR case is treated as well. When the flash doesn't support the CR Read command, we make an assumption about the value of the QE bit. In spi_nor_init(), call spi_nor_quad_enable() first, then spi_nor_unlock_all(), so that at the spi_nor_unlock_all() time we can be sure the QE bit has value one, because of the previous call to spi_nor_quad_enable(). Get rid of the MFR handling and implement specific manufacturer default_init() fixup hooks. Note that this changes a bit the logic for the SNOR_MFR_ATMEL, SNOR_MFR_INTEL and SNOR_MFR_SST cases. Before this patch, the Atmel, Intel and SST chips did not set the locking ops, but unlocked the entire flash at boot time, while now they are setting the locking ops to stm_locking_ops. This should work, since the disable of the block protection at the boot time used the same Status Register bits to unlock the flash, as in the stm_locking_ops case. Suggested-by: Boris Brezillon Signed-off-by: Tudor Ambarus Reviewed-by: Vignesh Raghavendra --- include/linux/mtd/spi-nor.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index d6ec55cc6d97..11daecc5a83d 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -581,8 +581,6 @@ struct flash_info; * @write_proto: the SPI protocol for write operations * @reg_proto the SPI protocol for read_reg/write_reg/erase operations * @controller_ops: SPI NOR controller driver specific operations. - * @clear_sr_bp: [FLASH-SPECIFIC] clears the Block Protection Bits from - * the SPI NOR Status Register. * @params: [FLASH-SPECIFIC] SPI-NOR flash parameters and settings. * The structure includes legacy flash parameters and * settings that can be overwritten by the spi_nor_fixups @@ -611,7 +609,6 @@ struct spi_nor { const struct spi_nor_controller_ops *controller_ops; - int (*clear_sr_bp)(struct spi_nor *nor); struct spi_nor_flash_parameter params; void *priv; -- cgit v1.2.3 From bb2dc7f46ad897ba1c2d8ae773c77601ba240932 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Thu, 7 Nov 2019 08:42:01 +0000 Subject: mtd: spi-nor: Rename CR_QUAD_EN_SPAN to SR2_QUAD_EN_BIT1 JEDEC Basic Flash Parameter Table, 15th DWORD, bits 22:20, refers to this bit as "bit 1 of the status register 2". Rename the macro accordingly. Signed-off-by: Tudor Ambarus Reviewed-by: Vignesh Raghavendra --- include/linux/mtd/spi-nor.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 11daecc5a83d..364309845de0 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -144,10 +144,8 @@ #define FSR_P_ERR BIT(4) /* Program operation status */ #define FSR_PT_ERR BIT(1) /* Protection error bit */ -/* Configuration Register bits. */ -#define CR_QUAD_EN_SPAN BIT(1) /* Spansion Quad I/O */ - /* Status Register 2 bits. */ +#define SR2_QUAD_EN_BIT1 BIT(1) #define SR2_QUAD_EN_BIT7 BIT(7) /* Supported SPI protocols */ -- cgit v1.2.3 From 658488ed2108f5772572c5a17c3f31ed6e554edc Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Thu, 7 Nov 2019 08:42:09 +0000 Subject: mtd: spi-nor: Rename Quad Enable methods Rename macronix_quad_enable() to a generic name: spi_nor_sr1_bit6_quad_enable(). Prepend "spi_nor_" to "sr2_bit7_quad_enable". All SPI NOR generic methods should be prepended by "spi_nor_". Signed-off-by: Tudor Ambarus Reviewed-by: Vignesh Raghavendra --- include/linux/mtd/spi-nor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 364309845de0..9eae35c60bce 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -133,7 +133,7 @@ #define SR_E_ERR BIT(5) #define SR_P_ERR BIT(6) -#define SR_QUAD_EN_MX BIT(6) /* Macronix Quad I/O */ +#define SR1_QUAD_EN_BIT6 BIT(6) /* Enhanced Volatile Configuration Register bits */ #define EVCR_QUAD_EN_MICRON BIT(7) /* Micron Quad I/O */ -- cgit v1.2.3 From 153bedbac2ebd475e1c7c2d2fa0c042f5525927d Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 8 Nov 2019 17:08:55 +0100 Subject: irq_work: Convert flags to atomic_t We need to convert flags to atomic_t in order to later fix an ordering issue on atomic_cmpxchg() failure. This will allow us to use atomic_fetch_or(). Also clarify the nature of those flags. [ mingo: Converted two more usage site the original patch missed. ] Signed-off-by: Frederic Weisbecker Cc: Linus Torvalds Cc: Paul E . McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Link: https://lkml.kernel.org/r/20191108160858.31665-2-frederic@kernel.org Signed-off-by: Ingo Molnar --- include/linux/irq_work.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index b11fcdfd0770..02da997ad12c 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h @@ -22,7 +22,7 @@ #define IRQ_WORK_CLAIMED (IRQ_WORK_PENDING | IRQ_WORK_BUSY) struct irq_work { - unsigned long flags; + atomic_t flags; struct llist_node llnode; void (*func)(struct irq_work *); }; @@ -30,11 +30,15 @@ struct irq_work { static inline void init_irq_work(struct irq_work *work, void (*func)(struct irq_work *)) { - work->flags = 0; + atomic_set(&work->flags, 0); work->func = func; } -#define DEFINE_IRQ_WORK(name, _f) struct irq_work name = { .func = (_f), } +#define DEFINE_IRQ_WORK(name, _f) struct irq_work name = { \ + .flags = ATOMIC_INIT(0), \ + .func = (_f) \ +} + bool irq_work_queue(struct irq_work *work); bool irq_work_queue_on(struct irq_work *work, int cpu); -- cgit v1.2.3 From e537654b7039aacfe8ae629d49655c0e5692ad44 Mon Sep 17 00:00:00 2001 From: Tuowen Zhao Date: Wed, 16 Oct 2019 15:06:28 -0600 Subject: lib: devres: add a helper function for ioremap_uc Implement a resource managed strongly uncachable ioremap function. Cc: # v4.19+ Tested-by: AceLan Kao Signed-off-by: Tuowen Zhao Acked-by: Mika Westerberg Acked-by: Andy Shevchenko Acked-by: Luis Chamberlain Signed-off-by: Lee Jones --- include/linux/io.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/io.h b/include/linux/io.h index accac822336a..a59834bc0a11 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -64,6 +64,8 @@ static inline void devm_ioport_unmap(struct device *dev, void __iomem *addr) void __iomem *devm_ioremap(struct device *dev, resource_size_t offset, resource_size_t size); +void __iomem *devm_ioremap_uc(struct device *dev, resource_size_t offset, + resource_size_t size); void __iomem *devm_ioremap_nocache(struct device *dev, resource_size_t offset, resource_size_t size); void __iomem *devm_ioremap_wc(struct device *dev, resource_size_t offset, -- cgit v1.2.3 From 37ef8c2c15bdc1322b160e38986c187de2b877b2 Mon Sep 17 00:00:00 2001 From: Daniel Schultz Date: Tue, 17 Sep 2019 10:12:53 +0200 Subject: mfd: rk808: Fix RK818 ID template The Rockchip PMIC driver can automatically detect connected component versions by reading the ID_MSB and ID_LSB registers. The probe function will always fail with RK818 PMICs because the ID_MSK is 0xFFF0 and the RK818 template ID is 0x8181. This patch changes this value to 0x8180. Fixes: 9d6105e19f61 ("mfd: rk808: Fix up the chip id get failed") Cc: stable@vger.kernel.org Cc: Elaine Zhang Cc: Joseph Chen Signed-off-by: Daniel Schultz Signed-off-by: Heiko Stuebner Signed-off-by: Lee Jones --- include/linux/mfd/rk808.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/rk808.h b/include/linux/mfd/rk808.h index 7cfd2b0504df..a59bf323f713 100644 --- a/include/linux/mfd/rk808.h +++ b/include/linux/mfd/rk808.h @@ -610,7 +610,7 @@ enum { RK808_ID = 0x0000, RK809_ID = 0x8090, RK817_ID = 0x8170, - RK818_ID = 0x8181, + RK818_ID = 0x8180, }; struct rk808 { -- cgit v1.2.3 From 393f05f1d4651f7e4661a8739c381300dd9e39dc Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 9 Feb 2016 14:08:27 +0000 Subject: mfd: Provide MACRO to declare commonly defined MFD cell attributes Signed-off-by: Lee Jones Acked-by: Laxman Dewangan Tested-by: Laxman Dewangan --- include/linux/mfd/core.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h index b43fc5773ad7..9a97e0932592 100644 --- a/include/linux/mfd/core.h +++ b/include/linux/mfd/core.h @@ -12,6 +12,35 @@ #include +#define MFD_RES_SIZE(arr) (sizeof(arr) / sizeof(struct resource)) + +#define MFD_CELL_ALL(_name, _res, _pdata, _pdsize, _id, _compat, _match)\ + { \ + .name = (_name), \ + .resources = (_res), \ + .num_resources = MFD_RES_SIZE((_res)), \ + .platform_data = (_pdata), \ + .pdata_size = (_pdsize), \ + .of_compatible = (_compat), \ + .acpi_match = (_match), \ + .id = (_id), \ + } + +#define OF_MFD_CELL(_name, _res, _pdata, _pdsize,_id, _compat) \ + MFD_CELL_ALL(_name, _res, _pdata, _pdsize, _id, _compat, NULL) \ + +#define ACPI_MFD_CELL(_name, _res, _pdata, _pdsize, _id, _match) \ + MFD_CELL_ALL(_name, _res, _pdata, _pdsize, _id, NULL, _match) \ + +#define MFD_CELL_BASIC(_name, _res, _pdata, _pdsize, _id) \ + MFD_CELL_ALL(_name, _res, _pdata, _pdsize, _id, NULL, NULL) \ + +#define MFD_CELL_RES(_name, _res) \ + MFD_CELL_ALL(_name, _res, NULL, 0, 0, NULL, NULL) \ + +#define MFD_CELL_NAME(_name) \ + MFD_CELL_ALL(_name, NULL, NULL, 0, 0, NULL, NULL) \ + struct irq_domain; struct property_entry; -- cgit v1.2.3 From 7f0e60c751dc7252276230c7f8def447ffc0af4e Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Wed, 2 Oct 2019 16:43:18 +0200 Subject: mfd: max77620: Do not allocate IRQs upfront regmap_add_irq_chip() will try to allocate all of the IRQ descriptors upfront if passed a non-zero irq_base parameter. However, the intention is to allocate IRQ descriptors on an as-needed basis if possible. Pass 0 instead of -1 to fix that use-case. Signed-off-by: Thierry Reding Signed-off-by: Lee Jones --- include/linux/mfd/max77620.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/max77620.h b/include/linux/mfd/max77620.h index 12ba157cb83f..f552ef5b1100 100644 --- a/include/linux/mfd/max77620.h +++ b/include/linux/mfd/max77620.h @@ -329,7 +329,6 @@ struct max77620_chip { struct regmap *rmap; int chip_irq; - int irq_base; /* chip id */ enum max77620_chip_id chip_id; -- cgit v1.2.3 From cbfdc839ea913250bd38528408addf44b27e9e5f Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 13 Oct 2019 10:30:15 +0100 Subject: mfd: twl: Endian fixups in i2c write and read wrappers Use a local variable to ensure correct endian types for intermediate results. Identified by sparse when building the IIO driver. Signed-off-by: Jonathan Cameron Signed-off-by: Lee Jones --- include/linux/mfd/twl.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/twl.h b/include/linux/mfd/twl.h index 44aff52a5002..089e8942223a 100644 --- a/include/linux/mfd/twl.h +++ b/include/linux/mfd/twl.h @@ -181,14 +181,18 @@ static inline int twl_i2c_read_u8(u8 mod_no, u8 *val, u8 reg) { } static inline int twl_i2c_write_u16(u8 mod_no, u16 val, u8 reg) { - val = cpu_to_le16(val); - return twl_i2c_write(mod_no, (u8*) &val, reg, 2); + __le16 value; + + value = cpu_to_le16(val); + return twl_i2c_write(mod_no, (u8 *) &value, reg, 2); } static inline int twl_i2c_read_u16(u8 mod_no, u16 *val, u8 reg) { int ret; - ret = twl_i2c_read(mod_no, (u8*) val, reg, 2); - *val = le16_to_cpu(*val); + __le16 value; + + ret = twl_i2c_read(mod_no, (u8 *) &value, reg, 2); + *val = le16_to_cpu(value); return ret; } -- cgit v1.2.3 From 28eafe9162b6a8b7d0266889d55567ba5a7809d5 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Mon, 21 Oct 2019 14:58:11 +0100 Subject: mfd: wm8998: Remove some unused registers Save a few bytes by removing some registers from the driver that are not currently used and not intended to be used at any point in the future. Signed-off-by: Charles Keepax Signed-off-by: Lee Jones --- include/linux/mfd/arizona/registers.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/arizona/registers.h b/include/linux/mfd/arizona/registers.h index bb1a2530ae27..49e24d1de8d4 100644 --- a/include/linux/mfd/arizona/registers.h +++ b/include/linux/mfd/arizona/registers.h @@ -1186,13 +1186,6 @@ #define ARIZONA_DSP4_SCRATCH_1 0x1441 #define ARIZONA_DSP4_SCRATCH_2 0x1442 #define ARIZONA_DSP4_SCRATCH_3 0x1443 -#define ARIZONA_FRF_COEFF_1 0x1700 -#define ARIZONA_FRF_COEFF_2 0x1701 -#define ARIZONA_FRF_COEFF_3 0x1702 -#define ARIZONA_FRF_COEFF_4 0x1703 -#define ARIZONA_V2_DAC_COMP_1 0x1704 -#define ARIZONA_V2_DAC_COMP_2 0x1705 - /* * Field Definitions. -- cgit v1.2.3 From 1e624fce3a1ca03fcea167cc43399d0073472edc Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Mon, 21 Oct 2019 14:58:13 +0100 Subject: mfd: madera: Add support for requesting the supply clocks Add the ability to get the clock for each clock input pin of the chip and enable MCLK2 since that is expected to be a permanently enabled 32kHz clock. Signed-off-by: Charles Keepax Signed-off-by: Lee Jones --- include/linux/mfd/madera/core.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/madera/core.h b/include/linux/mfd/madera/core.h index 7ffa696cce7c..ad2c138105d4 100644 --- a/include/linux/mfd/madera/core.h +++ b/include/linux/mfd/madera/core.h @@ -8,6 +8,7 @@ #ifndef MADERA_CORE_H #define MADERA_CORE_H +#include #include #include #include @@ -29,6 +30,13 @@ enum madera_type { CS42L92 = 9, }; +enum { + MADERA_MCLK1, + MADERA_MCLK2, + MADERA_MCLK3, + MADERA_NUM_MCLK +}; + #define MADERA_MAX_CORE_SUPPLIES 2 #define MADERA_MAX_GPIOS 40 @@ -155,6 +163,7 @@ struct snd_soc_dapm_context; * @irq_dev: the irqchip child driver device * @irq_data: pointer to irqchip data for the child irqchip driver * @irq: host irq number from SPI or I2C configuration + * @mclk: Structure holding clock supplies * @out_clamp: indicates output clamp state for each analogue output * @out_shorted: indicates short circuit state for each analogue output * @hp_ena: bitflags of enable state for the headphone outputs @@ -184,6 +193,8 @@ struct madera { struct regmap_irq_chip_data *irq_data; int irq; + struct clk_bulk_data mclk[MADERA_NUM_MCLK]; + unsigned int num_micbias; unsigned int num_childbias[MADERA_MAX_MICBIAS]; -- cgit v1.2.3 From ead1c83ddd7613d9e61368dc686d014e37955192 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Fri, 18 Oct 2019 13:31:39 +0100 Subject: mfd: mfd-core: Remove mfd_clone_cell() Providing a subsystem-level API helper seems over-kill just to save a few lines of C-code. Previous commits saw us convert mfd_clone_cell()'s only user over to use a more traditional style of MFD child-device registration. Now we can remove the superfluous helper from the MFD API. Signed-off-by: Lee Jones Reviewed-by: Daniel Thompson --- include/linux/mfd/core.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h index 9a97e0932592..63ac3cc86608 100644 --- a/include/linux/mfd/core.h +++ b/include/linux/mfd/core.h @@ -115,24 +115,6 @@ struct mfd_cell { extern int mfd_cell_enable(struct platform_device *pdev); extern int mfd_cell_disable(struct platform_device *pdev); -/* - * "Clone" multiple platform devices for a single cell. This is to be used - * for devices that have multiple users of a cell. For example, if an mfd - * driver wants the cell "foo" to be used by a GPIO driver, an MTD driver, - * and a platform driver, the following bit of code would be use after first - * calling mfd_add_devices(): - * - * const char *fclones[] = { "foo-gpio", "foo-mtd" }; - * err = mfd_clone_cells("foo", fclones, ARRAY_SIZE(fclones)); - * - * Each driver (MTD, GPIO, and platform driver) would then register - * platform_drivers for "foo-mtd", "foo-gpio", and "foo", respectively. - * The cell's .enable/.disable hooks should be used to deal with hardware - * resource contention. - */ -extern int mfd_clone_cell(const char *cell, const char **clones, - size_t n_clones); - /* * Given a platform device that's been created by mfd_add_devices(), fetch * the mfd_cell that created it. -- cgit v1.2.3 From 5a47c0fbd276b7f57bd38f153e8b15784b2f6f22 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 21 Oct 2019 10:47:37 +0100 Subject: mfd: mfd-core: Remove usage counting for .{en,dis}able() call-backs The MFD implementation for reference counting was complex and unnecessary. There was only one bona fide user which has now been converted to handle the process in a different way. Any future resource protection, shared enablement functions should be handed by the parent device, rather than through the MFD subsystem API. Signed-off-by: Lee Jones Reviewed-by: Daniel Thompson Reviewed-by: Mark Brown --- include/linux/mfd/core.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h index 63ac3cc86608..d01d1299e49d 100644 --- a/include/linux/mfd/core.h +++ b/include/linux/mfd/core.h @@ -59,8 +59,6 @@ struct mfd_cell { const char *name; int id; - /* refcounting for multiple drivers to use a single cell */ - atomic_t *usage_count; int (*enable)(struct platform_device *dev); int (*disable)(struct platform_device *dev); -- cgit v1.2.3 From 22fb3ad0cc5f578398953ddcab9c8239a08caccd Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sat, 26 Oct 2019 23:47:32 +0200 Subject: mfd: db8500-prcmu: Support U8420-sysclk firmware There is a distinct version of the Ux500 U8420 variant with "sysclk", as can be seen from the vendor code that didn't make it upstream, this firmware lacks the ULPPLL (ultra-low power phase locked loop) which in effect means that the timer clock is instead wired to the 32768 Hz always-on clock. This has some repercussions when enabling the timer clock as the code as it stands will disable the timer clock on these platforms (lacking the so-called "doze mode") and obtaining the wrong rate of the timer clock. The timer frequency is of course needed very early in the boot, and as a consequence, we need to shuffle around the early PRCMU init code: whereas in the past we did not need to look up the PRCMU firmware version in the early init, but now we need to know the version before the core system timers are registered so we restructure the platform callbacks to the PRCMU so as not to take any arguments and instead look up the resources it needs directly from the device tree when initializing. As we do not yet support any platforms using this firmware it is not a regression, but as PostmarketOS is starting to support products with this firmware we need to fix this up. The low rate of 32kHz also makes the MTU timer unsuitable as delay timer but this needs to be fixed in a separate patch. Signed-off-by: Linus Walleij Reviewed-by: Stephan Gerhold Acked-by: Olof Johansson Signed-off-by: Lee Jones --- include/linux/mfd/db8500-prcmu.h | 4 ++-- include/linux/mfd/dbx500-prcmu.h | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/db8500-prcmu.h b/include/linux/mfd/db8500-prcmu.h index 813710aa2cfd..1fc75d2b4a38 100644 --- a/include/linux/mfd/db8500-prcmu.h +++ b/include/linux/mfd/db8500-prcmu.h @@ -489,7 +489,7 @@ struct prcmu_auto_pm_config { #ifdef CONFIG_MFD_DB8500_PRCMU -void db8500_prcmu_early_init(u32 phy_base, u32 size); +void db8500_prcmu_early_init(void); int prcmu_set_rc_a2p(enum romcode_write); enum romcode_read prcmu_get_rc_p2a(void); enum ap_pwrst prcmu_get_xp70_current_state(void); @@ -546,7 +546,7 @@ void db8500_prcmu_write_masked(unsigned int reg, u32 mask, u32 value); #else /* !CONFIG_MFD_DB8500_PRCMU */ -static inline void db8500_prcmu_early_init(u32 phy_base, u32 size) {} +static inline void db8500_prcmu_early_init(void) {} static inline int prcmu_set_rc_a2p(enum romcode_write code) { diff --git a/include/linux/mfd/dbx500-prcmu.h b/include/linux/mfd/dbx500-prcmu.h index 238401a50d0b..e2571040c7e8 100644 --- a/include/linux/mfd/dbx500-prcmu.h +++ b/include/linux/mfd/dbx500-prcmu.h @@ -190,6 +190,7 @@ enum ddr_pwrst { #define PRCMU_FW_PROJECT_U8500_MBL2 12 /* Customer specific */ #define PRCMU_FW_PROJECT_U8520 13 #define PRCMU_FW_PROJECT_U8420 14 +#define PRCMU_FW_PROJECT_U8420_SYSCLK 17 #define PRCMU_FW_PROJECT_A9420 20 /* [32..63] 9540 and derivatives */ #define PRCMU_FW_PROJECT_U9540 32 @@ -211,9 +212,9 @@ struct prcmu_fw_version { #if defined(CONFIG_UX500_SOC_DB8500) -static inline void prcmu_early_init(u32 phy_base, u32 size) +static inline void prcmu_early_init(void) { - return db8500_prcmu_early_init(phy_base, size); + return db8500_prcmu_early_init(); } static inline int prcmu_set_power_state(u8 state, bool keep_ulp_clk, @@ -401,7 +402,7 @@ static inline int prcmu_config_a9wdog(u8 num, bool sleep_auto_off) } #else -static inline void prcmu_early_init(u32 phy_base, u32 size) {} +static inline void prcmu_early_init(void) {} static inline int prcmu_set_power_state(u8 state, bool keep_ulp_clk, bool keep_ap_pll) -- cgit v1.2.3 From 34dc0ea6bc960f1f57b2148f01a3f4da23f87013 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 29 Oct 2019 11:01:37 +0100 Subject: dma-direct: provide mmap and get_sgtable method overrides For dma-direct we know that the DMA address is an encoding of the physical address that we can trivially decode. Use that fact to provide implementations that do not need the arch_dma_coherent_to_pfn architecture hook. Note that we still can only support mmap of non-coherent memory only if the architecture provides a way to set an uncached bit in the page tables. This must be true for architectures that use the generic remap helpers, but other architectures can also manually select it. Signed-off-by: Christoph Hellwig Reviewed-by: Max Filippov --- include/linux/dma-direct.h | 7 +++++++ include/linux/dma-noncoherent.h | 2 -- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index ff3d5edc44b9..bcd953fb1f5a 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -68,5 +68,12 @@ void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs); struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, gfp_t gfp, unsigned long attrs); +int dma_direct_get_sgtable(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs); +bool dma_direct_can_mmap(struct device *dev); +int dma_direct_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs); int dma_direct_supported(struct device *dev, u64 mask); #endif /* _LINUX_DMA_DIRECT_H */ diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h index dd3de6d88fc0..e30fca1f1b12 100644 --- a/include/linux/dma-noncoherent.h +++ b/include/linux/dma-noncoherent.h @@ -41,8 +41,6 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); void arch_dma_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs); -long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr, - dma_addr_t dma_addr); #ifdef CONFIG_MMU /* -- cgit v1.2.3 From b12d66278dd627cbe1ea7c000aa4715aaf8830c8 Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Thu, 7 Nov 2019 16:06:44 +0100 Subject: dma-direct: check for overflows on 32 bit DMA addresses As seen on the new Raspberry Pi 4 and sta2x11's DMA implementation it is possible for a device configured with 32 bit DMA addresses and a partial DMA mapping located at the end of the address space to overflow. It happens when a higher physical address, not DMAable, is translated to it's DMA counterpart. For example the Raspberry Pi 4, configurable up to 4 GB of memory, has an interconnect capable of addressing the lower 1 GB of physical memory with a DMA offset of 0xc0000000. It transpires that, any attempt to translate physical addresses higher than the first GB will result in an overflow which dma_capable() can't detect as it only checks for addresses bigger then the maximum allowed DMA address. Fix this by verifying in dma_capable() if the DMA address range provided is at any point lower than the minimum possible DMA address on the bus. Signed-off-by: Nicolas Saenz Julienne Signed-off-by: Christoph Hellwig --- include/linux/dma-direct.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index bcd953fb1f5a..6db863c3eb93 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -3,8 +3,11 @@ #define _LINUX_DMA_DIRECT_H 1 #include +#include /* for min_low_pfn */ #include +static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr); + #ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA #include #else @@ -24,11 +27,16 @@ static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dev_addr) static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) { + dma_addr_t end = addr + size - 1; + if (!dev->dma_mask) return false; - return addr + size - 1 <= - min_not_zero(*dev->dma_mask, dev->bus_dma_mask); + if (!IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT) && + min(addr, end) < phys_to_dma(dev, PFN_PHYS(min_low_pfn))) + return false; + + return end <= min_not_zero(*dev->dma_mask, dev->bus_dma_mask); } #endif /* !CONFIG_ARCH_HAS_PHYS_TO_DMA */ -- cgit v1.2.3 From 6fcbcec9cfc7b3c6a2c1f1a23ebacedff7073e0a Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Sun, 10 Nov 2019 12:49:06 +0300 Subject: fs/quota: handle overflows of sysctl fs.quota.* and report as unsigned long Quota statistics counted as 64-bit per-cpu counter. Reading sums per-cpu fractions as signed 64-bit int, filters negative values and then reports lower half as signed 32-bit int. Result may looks like: fs.quota.allocated_dquots = 22327 fs.quota.cache_hits = -489852115 fs.quota.drops = -487288718 fs.quota.free_dquots = 22083 fs.quota.lookups = -486883485 fs.quota.reads = 22327 fs.quota.syncs = 335064 fs.quota.writes = 3088689 Values bigger than 2^31-1 reported as negative. All counters except "allocated_dquots" and "free_dquots" are monotonic, thus they should be reported as is without filtering negative values. Kernel doesn't have generic helper for 64-bit sysctl yet, let's use at least unsigned long. Link: https://lore.kernel.org/r/157337934693.2078.9842146413181153727.stgit@buzz Signed-off-by: Konstantin Khlebnikov Signed-off-by: Jan Kara --- include/linux/quota.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/quota.h b/include/linux/quota.h index f32dd270b8e3..27aab84fcbaa 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -263,7 +263,7 @@ enum { }; struct dqstats { - int stat[_DQST_DQSTAT_LAST]; + unsigned long stat[_DQST_DQSTAT_LAST]; struct percpu_counter counter[_DQST_DQSTAT_LAST]; }; -- cgit v1.2.3 From 2e7ec69d645210ea8a94cbb91799f57f62418bca Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 22 Oct 2019 10:36:28 +0200 Subject: backlight: gpio: Remove unused fields from platform data Remove the platform data fields that nobody uses. Signed-off-by: Bartosz Golaszewski Reviewed-by: Andy Shevchenko Reviewed-by: Linus Walleij Reviewed-by: Daniel Thompson Signed-off-by: Lee Jones --- include/linux/platform_data/gpio_backlight.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/gpio_backlight.h b/include/linux/platform_data/gpio_backlight.h index 34179d600360..1a8b5b1946fe 100644 --- a/include/linux/platform_data/gpio_backlight.h +++ b/include/linux/platform_data/gpio_backlight.h @@ -9,9 +9,6 @@ struct device; struct gpio_backlight_platform_data { struct device *fbdev; - int gpio; - int def_value; - const char *name; }; #endif -- cgit v1.2.3 From d9b86cc48283112f06738d45031b88bd3f9ecb92 Mon Sep 17 00:00:00 2001 From: Sowjanya Komatineni Date: Fri, 16 Aug 2019 12:41:52 -0700 Subject: clk: Add API to get index of the clock parent This patch adds a new clk_hw_get_parent_index() function that can be used to retrieve the index of a given clock's parent. This can be useful for restoring a clock on system resume. Reviewed-by: Thierry Reding Reviewed-by: Dmitry Osipenko Signed-off-by: Sowjanya Komatineni Signed-off-by: Thierry Reding --- include/linux/clk-provider.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 2fdfe8061363..caf4b9df16eb 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -818,6 +818,7 @@ unsigned int clk_hw_get_num_parents(const struct clk_hw *hw); struct clk_hw *clk_hw_get_parent(const struct clk_hw *hw); struct clk_hw *clk_hw_get_parent_by_index(const struct clk_hw *hw, unsigned int index); +int clk_hw_get_parent_index(struct clk_hw *hw); int clk_hw_set_parent(struct clk_hw *hw, struct clk_hw *new_parent); unsigned int __clk_get_enable_count(struct clk *clk); unsigned long clk_hw_get_rate(const struct clk_hw *hw); -- cgit v1.2.3 From ed1a2459e20c0dfc9d184230c480ace439bececb Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 12 Aug 2019 00:00:29 +0300 Subject: clk: tegra: Add Tegra20/30 EMC clock implementation A proper External Memory Controller clock rounding and parent selection functionality is required by the EMC drivers, it is not available using the generic clock implementation because only the Memory Controller driver is aware of what clock rates are actually available for a particular device. EMC drivers will have to register a Tegra-specific CLK-API callback which will perform rounding of a requested rate. EMC clock users won't be able to request EMC clock by getting -EPROBE_DEFER until EMC driver is probed and the callback is set up. The functionality is somewhat similar to the clk-emc.c which serves Tegra124+ SoCs. The later HW generations support more parent clock sources and the HW configuration / integration with the EMC drivers differs a tad from the older gens, hence it's not really worth to try to squash everything into a single source file. Acked-by: Peter De Schrijver Signed-off-by: Dmitry Osipenko Acked-by: Stephen Boyd Signed-off-by: Thierry Reding --- include/linux/clk/tegra.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk/tegra.h b/include/linux/clk/tegra.h index b8aef62cc3f5..6a7cbc3cfadc 100644 --- a/include/linux/clk/tegra.h +++ b/include/linux/clk/tegra.h @@ -119,4 +119,15 @@ extern void tegra210_put_utmipll_in_iddq(void); extern void tegra210_put_utmipll_out_iddq(void); extern int tegra210_clk_handle_mbist_war(unsigned int id); +struct clk; + +typedef long (tegra20_clk_emc_round_cb)(unsigned long rate, + unsigned long min_rate, + unsigned long max_rate, + void *arg); + +void tegra20_clk_set_emc_round_callback(tegra20_clk_emc_round_cb *round_cb, + void *cb_arg); +int tegra20_clk_prepare_emc_mc_same_freq(struct clk *emc_clk, bool same); + #endif /* __LINUX_CLK_TEGRA_H_ */ -- cgit v1.2.3 From 5699d160550b1e480c920f8182bd4b73b8c9ae43 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Tue, 15 Oct 2019 20:00:06 +0300 Subject: clk: tegra: Add missing stubs for the case of !CONFIG_PM_SLEEP The new CPUIDLE driver uses the Tegra's CLK API and that driver won't strictly depend on CONFIG_PM_SLEEP, hence add the required stubs in order to allow compiling of the new driver with the CONFIG_PM_SLEEP=n. Signed-off-by: Dmitry Osipenko Signed-off-by: Thierry Reding --- include/linux/clk/tegra.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk/tegra.h b/include/linux/clk/tegra.h index 6a7cbc3cfadc..2b1b35240074 100644 --- a/include/linux/clk/tegra.h +++ b/include/linux/clk/tegra.h @@ -108,6 +108,19 @@ static inline void tegra_cpu_clock_resume(void) tegra_cpu_car_ops->resume(); } +#else +static inline bool tegra_cpu_rail_off_ready(void) +{ + return false; +} + +static inline void tegra_cpu_clock_suspend(void) +{ +} + +static inline void tegra_cpu_clock_resume(void) +{ +} #endif extern void tegra210_xusb_pll_hw_control_enable(void); -- cgit v1.2.3 From 6c3a44ed3c553c324845744f30bcd1d3b07d61fd Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Sun, 10 Nov 2019 09:27:44 -0800 Subject: iommu/vt-d: Turn off translations at shutdown The intel-iommu driver assumes that the iommu state is cleaned up at the start of the new kernel. But, when we try to kexec boot something other than the Linux kernel, the cleanup cannot be relied upon. Hence, cleanup before we go down for reboot. Keeping the cleanup at initialization also, in case BIOS leaves the IOMMU enabled. I considered turning off iommu only during kexec reboot, but a clean shutdown seems always a good idea. But if someone wants to make it conditional, such as VMM live update, we can do that. There doesn't seem to be such a condition at this time. Tested that before, the info message 'DMAR: Translation was enabled for but we are not in kdump mode' would be reported for each iommu. The message will not appear when the DMA-remapping is not enabled on entry to the kernel. Signed-off-by: Deepa Dinamani Acked-by: Lu Baolu Signed-off-by: Joerg Roedel --- include/linux/dmar.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmar.h b/include/linux/dmar.h index a7cf3599d9a1..f64ca27dc210 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -129,6 +129,7 @@ static inline int dmar_res_noop(struct acpi_dmar_header *hdr, void *arg) #ifdef CONFIG_INTEL_IOMMU extern int iommu_detected, no_iommu; extern int intel_iommu_init(void); +extern void intel_iommu_shutdown(void); extern int dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg); extern int dmar_parse_one_atsr(struct acpi_dmar_header *header, void *arg); extern int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg); @@ -137,6 +138,7 @@ extern int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert); extern int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info); #else /* !CONFIG_INTEL_IOMMU: */ static inline int intel_iommu_init(void) { return -ENODEV; } +static inline void intel_iommu_shutdown(void) { } #define dmar_parse_one_rmrr dmar_res_noop #define dmar_parse_one_atsr dmar_res_noop -- cgit v1.2.3 From 83cba933a6db1dd4d7ac85170f99461fbc339eff Mon Sep 17 00:00:00 2001 From: Sagar Shrikant Kadam Date: Tue, 22 Oct 2019 17:22:19 +0000 Subject: mtd: spi-nor: Set default Quad Enable method for ISSI flashes Set the default Quad Enable method for ISSI flashes. Used for ISSI flashes (IS25WP256D-JMLE) that do not support SFDP tables and can not determine the Quad Enable method by parsing BFPT. Based on code originally written by Wesley Terpstra and/or Palmer Dabbelt https://github.com/riscv/riscv-linux/commit/c94e267766d62bc9a669611c3d0c8ed5ea26569b Signed-off-by: Sagar Shrikant Kadam [tudor.ambarus@microchip.com: - rebase, split and adapt for latest spi-nor/next, - use PMC CFI ID for ISSI. According to JEP106BA, "Programmable Micro Corp" changed its name to Integrated Silicon Solution (ISSI)] Signed-off-by: Tudor Ambarus Reviewed-by: Vignesh Raghavendra --- include/linux/mtd/spi-nor.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 9eae35c60bce..5a4623fc586b 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -22,6 +22,7 @@ #define SNOR_MFR_INTEL CFI_MFR_INTEL #define SNOR_MFR_ST CFI_MFR_ST /* ST Micro */ #define SNOR_MFR_MICRON CFI_MFR_MICRON /* Micron */ +#define SNOR_MFR_ISSI CFI_MFR_PMC #define SNOR_MFR_MACRONIX CFI_MFR_MACRONIX #define SNOR_MFR_SPANSION CFI_MFR_AMD #define SNOR_MFR_SST CFI_MFR_SST -- cgit v1.2.3 From cc9defcbb8fae52810f7795b039223edae51ef95 Mon Sep 17 00:00:00 2001 From: Michael Guralnik Date: Fri, 8 Nov 2019 23:45:24 +0000 Subject: net/mlx5: Handle "enable_roce" devlink param Register "enable_roce" param, default value is RoCE enabled. Current configuration is stored on mlx5_core_dev and exposed to user through the cmode runtime devlink param. Changing configuration requires changing the cmode driverinit devlink param and calling devlink reload. Signed-off-by: Michael Guralnik Acked-by: Jiri Pirko Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 7b4801e96feb..1884513aac90 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1191,4 +1191,15 @@ enum { MLX5_TRIGGERED_CMD_COMP = (u64)1 << 32, }; +static inline bool mlx5_is_roce_enabled(struct mlx5_core_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(dev); + union devlink_param_value val; + + devlink_param_driverinit_value_get(devlink, + DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE, + &val); + return val.vbool; +} + #endif /* MLX5_DRIVER_H */ -- cgit v1.2.3 From 7be5f90f689af5abb6b16755e212f76ed97a20dd Mon Sep 17 00:00:00 2001 From: Patrick Williams Date: Tue, 1 Oct 2019 11:00:00 -0500 Subject: i2c: pxa: remove unused i2c-slave APIs With the i2c-pxa driver migrated to the standard i2c-slave APIs, the custom APIs and structures are no longer needed or used. Remove them. Signed-off-by: Patrick Williams Signed-off-by: Wolfram Sang --- include/linux/i2c-pxa.h | 18 ------------------ include/linux/platform_data/i2c-pxa.h | 4 ---- 2 files changed, 22 deletions(-) delete mode 100644 include/linux/i2c-pxa.h (limited to 'include/linux') diff --git a/include/linux/i2c-pxa.h b/include/linux/i2c-pxa.h deleted file mode 100644 index a897e2b507b6..000000000000 --- a/include/linux/i2c-pxa.h +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_I2C_ALGO_PXA_H -#define _LINUX_I2C_ALGO_PXA_H - -typedef enum i2c_slave_event_e { - I2C_SLAVE_EVENT_START_READ, - I2C_SLAVE_EVENT_START_WRITE, - I2C_SLAVE_EVENT_STOP -} i2c_slave_event_t; - -struct i2c_slave_client { - void *data; - void (*event)(void *ptr, i2c_slave_event_t event); - int (*read) (void *ptr); - void (*write)(void *ptr, unsigned int val); -}; - -#endif /* _LINUX_I2C_ALGO_PXA_H */ diff --git a/include/linux/platform_data/i2c-pxa.h b/include/linux/platform_data/i2c-pxa.h index cb290092599c..6a9b28399b39 100644 --- a/include/linux/platform_data/i2c-pxa.h +++ b/include/linux/platform_data/i2c-pxa.h @@ -55,11 +55,7 @@ */ #define I2C_ISR_INIT 0x7FF /* status register init */ -struct i2c_slave_client; - struct i2c_pxa_platform_data { - unsigned int slave_addr; - struct i2c_slave_client *slave; unsigned int class; unsigned int use_pio :1; unsigned int fast_mode :1; -- cgit v1.2.3 From c1d51f684c72b5eb2aecbbd47be3a2977a2dc903 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 7 Nov 2019 15:25:12 +0100 Subject: cpuidle: Use nanoseconds as the unit of time Currently, the cpuidle subsystem uses microseconds as the unit of time which (among other things) causes the idle loop to incur some integer division overhead for no clear benefit. In order to allow cpuidle to measure time in nanoseconds, add two new fields, exit_latency_ns and target_residency_ns, to represent the exit latency and target residency of an idle state in nanoseconds, respectively, to struct cpuidle_state and initialize them with the help of the corresponding values in microseconds provided by drivers. Additionally, change cpuidle_governor_latency_req() to return the idle state exit latency constraint in nanoseconds. Also meeasure idle state residency (last_residency_ns in struct cpuidle_device and time_ns in struct cpuidle_driver) in nanoseconds and update the cpuidle core and governors accordingly. However, the menu governor still computes typical intervals in microseconds to avoid integer overflows. Signed-off-by: Rafael J. Wysocki Acked-by: Peter Zijlstra (Intel) Acked-by: Doug Smythies Tested-by: Doug Smythies --- include/linux/cpuidle.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index d23a3b1ddcf6..22602747f468 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -35,7 +35,7 @@ struct cpuidle_driver; struct cpuidle_state_usage { unsigned long long disable; unsigned long long usage; - unsigned long long time; /* in US */ + u64 time_ns; unsigned long long above; /* Number of times it's been too deep */ unsigned long long below; /* Number of times it's been too shallow */ #ifdef CONFIG_SUSPEND @@ -48,6 +48,8 @@ struct cpuidle_state { char name[CPUIDLE_NAME_LEN]; char desc[CPUIDLE_DESC_LEN]; + u64 exit_latency_ns; + u64 target_residency_ns; unsigned int flags; unsigned int exit_latency; /* in US */ int power_usage; /* in mW */ @@ -89,7 +91,7 @@ struct cpuidle_device { ktime_t next_hrtimer; int last_state_idx; - int last_residency; + u64 last_residency_ns; u64 poll_limit_ns; struct cpuidle_state_usage states_usage[CPUIDLE_STATE_MAX]; struct cpuidle_state_kobj *kobjs[CPUIDLE_STATE_MAX]; @@ -263,7 +265,7 @@ struct cpuidle_governor { #ifdef CONFIG_CPU_IDLE extern int cpuidle_register_governor(struct cpuidle_governor *gov); -extern int cpuidle_governor_latency_req(unsigned int cpu); +extern s64 cpuidle_governor_latency_req(unsigned int cpu); #else static inline int cpuidle_register_governor(struct cpuidle_governor *gov) {return 0;} -- cgit v1.2.3 From 50ec88120ea16cf8b9aabf8422c364166ce3ee17 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 8 Oct 2019 19:20:39 +0300 Subject: can: mcp251x: get rid of legacy platform data Instead of using legacy platform data, switch to use device properties. For clock frequency we are using well established clock-frequency property. Users, two for now, are also converted here. Cc: Daniel Mack Cc: Haojian Zhuang Cc: Robert Jarzmik Cc: Russell King Signed-off-by: Andy Shevchenko Signed-off-by: Marc Kleine-Budde --- include/linux/can/platform/mcp251x.h | 22 ---------------------- 1 file changed, 22 deletions(-) delete mode 100644 include/linux/can/platform/mcp251x.h (limited to 'include/linux') diff --git a/include/linux/can/platform/mcp251x.h b/include/linux/can/platform/mcp251x.h deleted file mode 100644 index 9e5ac27fb6c1..000000000000 --- a/include/linux/can/platform/mcp251x.h +++ /dev/null @@ -1,22 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _CAN_PLATFORM_MCP251X_H -#define _CAN_PLATFORM_MCP251X_H - -/* - * - * CAN bus driver for Microchip 251x CAN Controller with SPI Interface - * - */ - -#include - -/* - * struct mcp251x_platform_data - MCP251X SPI CAN controller platform data - * @oscillator_frequency: - oscillator frequency in Hz - */ - -struct mcp251x_platform_data { - unsigned long oscillator_frequency; -}; - -#endif /* !_CAN_PLATFORM_MCP251X_H */ -- cgit v1.2.3 From 61d2350615c2c42f7af65d9a575f5dbf9738a10e Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 7 Oct 2019 13:36:58 +0200 Subject: can: rx-offload: can_rx_offload_reset(): remove no-op function This patch removes the function can_rx_offload_reset(), as it does nothing. If we ever need this function, add it back again. Signed-off-by: Marc Kleine-Budde --- include/linux/can/rx-offload.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/can/rx-offload.h b/include/linux/can/rx-offload.h index 01219f2902bf..fc75e9a7ad2f 100644 --- a/include/linux/can/rx-offload.h +++ b/include/linux/can/rx-offload.h @@ -44,7 +44,6 @@ unsigned int can_rx_offload_get_echo_skb(struct can_rx_offload *offload, unsigned int idx, u32 timestamp); int can_rx_offload_queue_tail(struct can_rx_offload *offload, struct sk_buff *skb); -void can_rx_offload_reset(struct can_rx_offload *offload); void can_rx_offload_del(struct can_rx_offload *offload); void can_rx_offload_enable(struct can_rx_offload *offload); -- cgit v1.2.3 From 4e9c9484b085dbba60b299182dd490eaeb84d18a Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Fri, 12 Jul 2019 08:02:38 +0000 Subject: can: rx-offload: Prepare for CAN FD support The skbs for classic CAN and CAN FD frames are allocated with seperate functions: alloc_can_skb() and alloc_canfd_skb(). In order to support CAN FD frames via the rx-offload helper, the driver itself has to allocate the skb (depending whether it received a classic CAN or CAN FD frame), as the rx-offload helper cannot know which kind of CAN frame the driver has received. This patch moves the allocation of the skb into the struct can_rx_offload::mailbox_read callbacks of the the flexcan and ti_hecc driver and adjusts the rx-offload helper accordingly. Signed-off-by: Joakim Zhang Signed-off-by: Marc Kleine-Budde --- include/linux/can/rx-offload.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/can/rx-offload.h b/include/linux/can/rx-offload.h index fc75e9a7ad2f..1b78a0cfb615 100644 --- a/include/linux/can/rx-offload.h +++ b/include/linux/can/rx-offload.h @@ -15,9 +15,9 @@ struct can_rx_offload { struct net_device *dev; - unsigned int (*mailbox_read)(struct can_rx_offload *offload, - struct can_frame *cf, - u32 *timestamp, unsigned int mb); + struct sk_buff *(*mailbox_read)(struct can_rx_offload *offload, + unsigned int mb, u32 *timestamp, + bool drop); struct sk_buff_head skb_queue; u32 skb_queue_len_max; -- cgit v1.2.3 From 4238fad366a660cbc6499ca1ea4be42bd4d1ac5b Mon Sep 17 00:00:00 2001 From: Nayna Jain Date: Wed, 30 Oct 2019 23:31:27 -0400 Subject: powerpc/ima: Add support to initialize ima policy rules PowerNV systems use a Linux-based bootloader, which rely on the IMA subsystem to enforce different secure boot modes. Since the verification policy may differ based on the secure boot mode of the system, the policies must be defined at runtime. This patch implements arch-specific support to define IMA policy rules based on the runtime secure boot mode of the system. This patch provides arch-specific IMA policies if PPC_SECURE_BOOT config is enabled. Signed-off-by: Nayna Jain Signed-off-by: Mimi Zohar Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1572492694-6520-3-git-send-email-zohar@linux.ibm.com --- include/linux/ima.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ima.h b/include/linux/ima.h index 1c37f17f7203..6d904754d858 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -29,7 +29,8 @@ extern void ima_kexec_cmdline(const void *buf, int size); extern void ima_add_kexec_buffer(struct kimage *image); #endif -#if (defined(CONFIG_X86) && defined(CONFIG_EFI)) || defined(CONFIG_S390) +#if (defined(CONFIG_X86) && defined(CONFIG_EFI)) || defined(CONFIG_S390) \ + || defined(CONFIG_PPC_SECURE_BOOT) extern bool arch_ima_get_secureboot(void); extern const char * const *arch_get_ima_policy(void); #else -- cgit v1.2.3 From 39a963b457b5c6cbbdc70441c9d496e39d151582 Mon Sep 17 00:00:00 2001 From: Nayna Jain Date: Tue, 1 Oct 2019 19:37:18 -0400 Subject: sysfs: Fixes __BIN_ATTR_WO() macro This patch fixes the size and write parameter for the macro __BIN_ATTR_WO(). Fixes: 7f905761e15a8 ("sysfs: add BIN_ATTR_WO() macro") Signed-off-by: Nayna Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1569973038-2710-1-git-send-email-nayna@linux.ibm.com --- include/linux/sysfs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 5420817ed317..fa7ee503fb76 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -196,9 +196,9 @@ struct bin_attribute { .size = _size, \ } -#define __BIN_ATTR_WO(_name) { \ +#define __BIN_ATTR_WO(_name, _size) { \ .attr = { .name = __stringify(_name), .mode = 0200 }, \ - .store = _name##_store, \ + .write = _name##_write, \ .size = _size, \ } -- cgit v1.2.3 From 3bbc53f4ae1686b501d92d4a5fd400f4958c8a98 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 7 Nov 2019 10:19:24 +0100 Subject: hrtimer: Remove the comment about not used HRTIMER_SOFTIRQ The softirq `HRTIMER_SOFTIRQ' was not used since commit c6eb3f70d448 ("hrtimer: Get rid of hrtimer softirq"). But it got used again, beginning with commit 5da70160462e ("hrtimer: Implement support for softirq based hrtimers"), which did not remove the comment. Remove it now. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20191107091924.13410-1-bigeasy@linutronix.de --- include/linux/interrupt.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 89fc59dab57d..963c3c695784 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -520,8 +520,7 @@ enum IRQ_POLL_SOFTIRQ, TASKLET_SOFTIRQ, SCHED_SOFTIRQ, - HRTIMER_SOFTIRQ, /* Unused, but kept as tools rely on the - numbering. Sigh! */ + HRTIMER_SOFTIRQ, RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */ NR_SOFTIRQS -- cgit v1.2.3 From e23f568aa63f64cd6b355094224cc9356c0f696b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 4 Nov 2019 15:54:29 -0800 Subject: kernfs: fix ino wrap-around detection When the 32bit ino wraps around, kernfs increments the generation number to distinguish reused ino instances. The wrap-around detection tests whether the allocated ino is lower than what the cursor but the cursor is pointing to the next ino to allocate so the condition never triggers. Fix it by remembering the last ino and comparing against that. Signed-off-by: Tejun Heo Reviewed-by: Greg Kroah-Hartman Fixes: 4a3ef68acacf ("kernfs: implement i_generation") Cc: Namhyung Kim Cc: stable@vger.kernel.org # v4.14+ --- include/linux/kernfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 936b61bd504e..f797ccc650e7 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -187,6 +187,7 @@ struct kernfs_root { /* private fields, do not use outside kernfs proper */ struct idr ino_idr; + u32 last_ino; u32 next_generation; struct kernfs_syscall_ops *syscall_ops; -- cgit v1.2.3 From 67c0496e87d193b8356d2af49ab95e8a1b954b3c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 4 Nov 2019 15:54:30 -0800 Subject: kernfs: convert kernfs_node->id from union kernfs_node_id to u64 kernfs_node->id is currently a union kernfs_node_id which represents either a 32bit (ino, gen) pair or u64 value. I can't see much value in the usage of the union - all that's needed is a 64bit ID which the current code is already limited to. Using a union makes the code unnecessarily complicated and prevents using 64bit ino without adding practical benefits. This patch drops union kernfs_node_id and makes kernfs_node->id a u64. ino is stored in the lower 32bits and gen upper. Accessors - kernfs[_id]_ino() and kernfs[_id]_gen() - are added to retrieve the ino and gen. This simplifies ID handling less cumbersome and will allow using 64bit inos on supported archs. This patch doesn't make any functional changes. Signed-off-by: Tejun Heo Reviewed-by: Greg Kroah-Hartman Cc: Namhyung Kim Cc: Jens Axboe Cc: Alexei Starovoitov --- include/linux/cgroup.h | 17 ++++++++--------- include/linux/kernfs.h | 45 +++++++++++++++++++++++++++------------------ 2 files changed, 35 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index f6b048902d6c..815fff49d555 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -616,7 +616,7 @@ static inline bool cgroup_is_populated(struct cgroup *cgrp) /* returns ino associated with a cgroup */ static inline ino_t cgroup_ino(struct cgroup *cgrp) { - return cgrp->kn->id.ino; + return kernfs_ino(cgrp->kn); } /* cft/css accessors for cftype->write() operation */ @@ -687,13 +687,12 @@ static inline void cgroup_kthread_ready(void) current->no_cgroup_migration = 0; } -static inline union kernfs_node_id *cgroup_get_kernfs_id(struct cgroup *cgrp) +static inline u64 cgroup_get_kernfs_id(struct cgroup *cgrp) { - return &cgrp->kn->id; + return cgrp->kn->id; } -void cgroup_path_from_kernfs_id(const union kernfs_node_id *id, - char *buf, size_t buflen); +void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen); #else /* !CONFIG_CGROUPS */ struct cgroup_subsys_state; @@ -718,9 +717,9 @@ static inline int cgroup_init_early(void) { return 0; } static inline int cgroup_init(void) { return 0; } static inline void cgroup_init_kthreadd(void) {} static inline void cgroup_kthread_ready(void) {} -static inline union kernfs_node_id *cgroup_get_kernfs_id(struct cgroup *cgrp) +static inline union u64 cgroup_get_kernfs_id(struct cgroup *cgrp) { - return NULL; + return 0; } static inline struct cgroup *cgroup_parent(struct cgroup *cgrp) @@ -739,8 +738,8 @@ static inline bool task_under_cgroup_hierarchy(struct task_struct *task, return true; } -static inline void cgroup_path_from_kernfs_id(const union kernfs_node_id *id, - char *buf, size_t buflen) {} +static inline void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen) +{} #endif /* !CONFIG_CGROUPS */ #ifdef CONFIG_CGROUPS diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index f797ccc650e7..b2fc5c8ef6d9 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -104,21 +104,6 @@ struct kernfs_elem_attr { struct kernfs_node *notify_next; /* for kernfs_notify() */ }; -/* represent a kernfs node */ -union kernfs_node_id { - struct { - /* - * blktrace will export this struct as a simplified 'struct - * fid' (which is a big data struction), so userspace can use - * it to find kernfs node. The layout must match the first two - * fields of 'struct fid' exactly. - */ - u32 ino; - u32 generation; - }; - u64 id; -}; - /* * kernfs_node - the building block of kernfs hierarchy. Each and every * kernfs node is represented by single kernfs_node. Most fields are @@ -155,7 +140,12 @@ struct kernfs_node { void *priv; - union kernfs_node_id id; + /* + * 64bit unique ID. Lower 32bits carry the inode number and lower + * generation. + */ + u64 id; + unsigned short flags; umode_t mode; struct kernfs_iattrs *iattr; @@ -292,6 +282,26 @@ static inline enum kernfs_node_type kernfs_type(struct kernfs_node *kn) return kn->flags & KERNFS_TYPE_MASK; } +static inline ino_t kernfs_id_ino(u64 id) +{ + return (u32)id; +} + +static inline u32 kernfs_id_gen(u64 id) +{ + return id >> 32; +} + +static inline ino_t kernfs_ino(struct kernfs_node *kn) +{ + return kernfs_id_ino(kn->id); +} + +static inline ino_t kernfs_gen(struct kernfs_node *kn) +{ + return kernfs_id_gen(kn->id); +} + /** * kernfs_enable_ns - enable namespace under a directory * @kn: directory of interest, should be empty @@ -383,8 +393,7 @@ void kernfs_kill_sb(struct super_block *sb); void kernfs_init(void); -struct kernfs_node *kernfs_get_node_by_id(struct kernfs_root *root, - const union kernfs_node_id *id); +struct kernfs_node *kernfs_get_node_by_id(struct kernfs_root *root, u64 id); #else /* CONFIG_KERNFS */ static inline enum kernfs_node_type kernfs_type(struct kernfs_node *kn) -- cgit v1.2.3 From fe0f726c9fb626b1092a9ea3bf75f57f2eed676e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 4 Nov 2019 15:54:30 -0800 Subject: kernfs: combine ino/id lookup functions into kernfs_find_and_get_node_by_id() kernfs_find_and_get_node_by_ino() looks the kernfs_node matching the specified ino. On top of that, kernfs_get_node_by_id() and kernfs_fh_get_inode() implement full ID matching by testing the rest of ID. On surface, confusingly, the two are slightly different in that the latter uses 0 gen as wildcard while the former doesn't - does it mean that the latter can't uniquely identify inodes w/ 0 gen? In practice, this is a distinction without a difference because generation number starts at 1. There are no actual IDs with 0 gen, so it can always safely used as wildcard. Let's simplify the code by renaming kernfs_find_and_get_node_by_ino() to kernfs_find_and_get_node_by_id(), moving all lookup logics into it, and removing now unnecessary kernfs_get_node_by_id(). Signed-off-by: Tejun Heo Reviewed-by: Greg Kroah-Hartman --- include/linux/kernfs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index b2fc5c8ef6d9..38267cc9420c 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -393,7 +393,8 @@ void kernfs_kill_sb(struct super_block *sb); void kernfs_init(void); -struct kernfs_node *kernfs_get_node_by_id(struct kernfs_root *root, u64 id); +struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root, + u64 id); #else /* CONFIG_KERNFS */ static inline enum kernfs_node_type kernfs_type(struct kernfs_node *kn) -- cgit v1.2.3 From 33c5ac9175195c36a0b7005aaf503a2e81f117a1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 4 Nov 2019 15:54:30 -0800 Subject: kernfs: implement custom exportfs ops and fid type The current kernfs exportfs implementation uses the generic_fh_*() helpers and FILEID_INO32_GEN[_PARENT] which limits ino to 32bits. Let's implement custom exportfs operations and fid type to remove the restriction. * FILEID_KERNFS is a single u64 value whose content is kernfs_node->id. This is the only native fid type. * For backward compatibility with blk_log_action() path which exposes (ino,gen) pairs which userland assembles into FILEID_INO32_GEN keys, combine the generic keys into 64bit IDs in the same order. Signed-off-by: Tejun Heo Reviewed-by: Greg Kroah-Hartman Cc: Namhyung Kim --- include/linux/exportfs.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index cf6571fc9c01..d896b8657085 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -104,6 +104,11 @@ enum fid_type { */ FILEID_LUSTRE = 0x97, + /* + * 64 bit unique kernfs id + */ + FILEID_KERNFS = 0xfe, + /* * Filesystems must not use 0xff file ID. */ -- cgit v1.2.3 From 40430452fd5da1509177ac597b394614cd3a121f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 4 Nov 2019 15:54:30 -0800 Subject: kernfs: use 64bit inos if ino_t is 64bit Each kernfs_node is identified with a 64bit ID. The low 32bit is exposed as ino and the high gen. While this already allows using inos as keys by looking up with wildcard generation number of 0, it's adding unnecessary complications for 64bit ino archs which can directly use kernfs_node IDs as inos to uniquely identify each cgroup instance. This patch exposes IDs directly as inos on 64bit ino archs. The conversion is mostly straight-forward. * 32bit ino archs behave the same as before. 64bit ino archs now use the whole 64bit ID as ino and the generation number is fixed at 1. * 64bit inos still use the same idr allocator which gurantees that the lower 32bits identify the current live instance uniquely and the high 32bits are incremented whenever the low bits wrap. As the upper 32bits are no longer used as gen and we don't wanna start ino allocation with 33rd bit set, the initial value for highbits allocation is changed to 0 on 64bit ino archs. * blktrace exposes two 32bit numbers - (INO,GEN) pair - to identify the issuing cgroup. Userland builds FILEID_INO32_GEN fids from these numbers to look up the cgroups. To remain compatible with the behavior, always output (LOW32,HIGH32) which will be constructed back to the original 64bit ID by __kernfs_fh_to_dentry(). Signed-off-by: Tejun Heo Reviewed-by: Greg Kroah-Hartman Cc: Namhyung Kim --- include/linux/kernfs.h | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 38267cc9420c..dded2e5a9f42 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -141,8 +141,8 @@ struct kernfs_node { void *priv; /* - * 64bit unique ID. Lower 32bits carry the inode number and lower - * generation. + * 64bit unique ID. On 64bit ino setups, id is the ino. On 32bit, + * the low 32bits are ino and upper generation. */ u64 id; @@ -177,8 +177,8 @@ struct kernfs_root { /* private fields, do not use outside kernfs proper */ struct idr ino_idr; - u32 last_ino; - u32 next_generation; + u32 last_id_lowbits; + u32 id_highbits; struct kernfs_syscall_ops *syscall_ops; /* list of kernfs_super_info of this root, protected by kernfs_mutex */ @@ -284,12 +284,20 @@ static inline enum kernfs_node_type kernfs_type(struct kernfs_node *kn) static inline ino_t kernfs_id_ino(u64 id) { - return (u32)id; + /* id is ino if ino_t is 64bit; otherwise, low 32bits */ + if (sizeof(ino_t) >= sizeof(u64)) + return id; + else + return (u32)id; } static inline u32 kernfs_id_gen(u64 id) { - return id >> 32; + /* gen is fixed at 1 if ino_t is 64bit; otherwise, high 32bits */ + if (sizeof(ino_t) >= sizeof(u64)) + return 1; + else + return id >> 32; } static inline ino_t kernfs_ino(struct kernfs_node *kn) -- cgit v1.2.3 From 743210386c0354a2f8ef3d697353c7d8477fa81d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 4 Nov 2019 15:54:30 -0800 Subject: cgroup: use cgrp->kn->id as the cgroup ID cgroup ID is currently allocated using a dedicated per-hierarchy idr and used internally and exposed through tracepoints and bpf. This is confusing because there are tracepoints and other interfaces which use the cgroupfs ino as IDs. The preceding changes made kn->id exposed as ino as 64bit ino on supported archs or ino+gen (low 32bits as ino, high gen). There's no reason for cgroup to use different IDs. The kernfs IDs are unique and userland can easily discover them and map them back to paths using standard file operations. This patch replaces cgroup IDs with kernfs IDs. * cgroup_id() is added and all cgroup ID users are converted to use it. * kernfs_node creation is moved to earlier during cgroup init so that cgroup_id() is available during init. * While at it, s/cgroup/cgrp/ in psi helpers for consistency. * Fallback ID value is changed to 1 to be consistent with root cgroup ID. Signed-off-by: Tejun Heo Reviewed-by: Greg Kroah-Hartman Cc: Namhyung Kim --- include/linux/cgroup-defs.h | 17 ++--------------- include/linux/cgroup.h | 17 +++++++---------- 2 files changed, 9 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 4904b1ebd1ff..63097cb243cb 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -354,16 +354,6 @@ struct cgroup { unsigned long flags; /* "unsigned long" so bitops work */ - /* - * idr allocated in-hierarchy ID. - * - * ID 0 is not used, the ID of the root cgroup is always 1, and a - * new cgroup will be assigned with a smallest available ID. - * - * Allocating/Removing ID must be protected by cgroup_mutex. - */ - int id; - /* * The depth this cgroup is at. The root is at depth zero and each * step down the hierarchy increments the level. This along with @@ -488,7 +478,7 @@ struct cgroup { struct cgroup_freezer_state freezer; /* ids of the ancestors at each level including self */ - int ancestor_ids[]; + u64 ancestor_ids[]; }; /* @@ -509,7 +499,7 @@ struct cgroup_root { struct cgroup cgrp; /* for cgrp->ancestor_ids[0] */ - int cgrp_ancestor_id_storage; + u64 cgrp_ancestor_id_storage; /* Number of cgroups in the hierarchy, used only for /proc/cgroups */ atomic_t nr_cgrps; @@ -520,9 +510,6 @@ struct cgroup_root { /* Hierarchy-specific flags */ unsigned int flags; - /* IDs for cgroups in this hierarchy */ - struct idr cgroup_idr; - /* The path to use for release notifications. */ char release_agent_path[PATH_MAX]; diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 815fff49d555..d7ddebd0cdec 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -304,6 +304,11 @@ void css_task_iter_end(struct css_task_iter *it); * Inline functions. */ +static inline u64 cgroup_id(struct cgroup *cgrp) +{ + return cgrp->kn->id; +} + /** * css_get - obtain a reference on the specified css * @css: target css @@ -565,7 +570,7 @@ static inline bool cgroup_is_descendant(struct cgroup *cgrp, { if (cgrp->root != ancestor->root || cgrp->level < ancestor->level) return false; - return cgrp->ancestor_ids[ancestor->level] == ancestor->id; + return cgrp->ancestor_ids[ancestor->level] == cgroup_id(ancestor); } /** @@ -687,17 +692,13 @@ static inline void cgroup_kthread_ready(void) current->no_cgroup_migration = 0; } -static inline u64 cgroup_get_kernfs_id(struct cgroup *cgrp) -{ - return cgrp->kn->id; -} - void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen); #else /* !CONFIG_CGROUPS */ struct cgroup_subsys_state; struct cgroup; +static inline u64 cgroup_id(struct cgroup *cgrp) { return 1; } static inline void css_get(struct cgroup_subsys_state *css) {} static inline void css_put(struct cgroup_subsys_state *css) {} static inline int cgroup_attach_task_all(struct task_struct *from, @@ -717,10 +718,6 @@ static inline int cgroup_init_early(void) { return 0; } static inline int cgroup_init(void) { return 0; } static inline void cgroup_init_kthreadd(void) {} static inline void cgroup_kthread_ready(void) {} -static inline union u64 cgroup_get_kernfs_id(struct cgroup *cgrp) -{ - return 0; -} static inline struct cgroup *cgroup_parent(struct cgroup *cgrp) { -- cgit v1.2.3 From d05a0201969045f4c488f7cf1d024089949a68b6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 12 Nov 2019 16:34:22 +0100 Subject: sunrpc: remove __KERNEL__ ifdefs Remove the __KERNEL__ ifdefs from the non-UAPI sunrpc headers, as those can't be included from user space programs. Signed-off-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/auth.h | 3 --- include/linux/sunrpc/auth_gss.h | 2 -- include/linux/sunrpc/clnt.h | 3 --- include/linux/sunrpc/gss_api.h | 2 -- include/linux/sunrpc/gss_err.h | 3 --- include/linux/sunrpc/msg_prot.h | 3 --- include/linux/sunrpc/rpc_pipe_fs.h | 3 --- include/linux/sunrpc/svcauth.h | 4 ---- include/linux/sunrpc/svcauth_gss.h | 2 -- include/linux/sunrpc/xdr.h | 3 --- include/linux/sunrpc/xprt.h | 4 ---- include/linux/sunrpc/xprtsock.h | 4 ---- 12 files changed, 36 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 5f9076fdb090..e9ec742796e7 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -10,8 +10,6 @@ #ifndef _LINUX_SUNRPC_AUTH_H #define _LINUX_SUNRPC_AUTH_H -#ifdef __KERNEL__ - #include #include #include @@ -194,5 +192,4 @@ struct rpc_cred *get_rpccred(struct rpc_cred *cred) return NULL; } -#endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_AUTH_H */ diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h index 30427b729070..43e481aa347a 100644 --- a/include/linux/sunrpc/auth_gss.h +++ b/include/linux/sunrpc/auth_gss.h @@ -13,7 +13,6 @@ #ifndef _LINUX_SUNRPC_AUTH_GSS_H #define _LINUX_SUNRPC_AUTH_GSS_H -#ifdef __KERNEL__ #include #include #include @@ -90,6 +89,5 @@ struct gss_cred { unsigned long gc_upcall_timestamp; }; -#endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_AUTH_GSS_H */ diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index abc63bd1be2b..64bffcb7142b 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -109,8 +109,6 @@ struct rpc_procinfo { const char * p_name; /* name of procedure */ }; -#ifdef __KERNEL__ - struct rpc_create_args { struct net *net; int protocol; @@ -237,5 +235,4 @@ static inline int rpc_reply_expected(struct rpc_task *task) (task->tk_msg.rpc_proc->p_decode != NULL); } -#endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_CLNT_H */ diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h index 5ac5db4d295f..bd691e08be3b 100644 --- a/include/linux/sunrpc/gss_api.h +++ b/include/linux/sunrpc/gss_api.h @@ -13,7 +13,6 @@ #ifndef _LINUX_SUNRPC_GSS_API_H #define _LINUX_SUNRPC_GSS_API_H -#ifdef __KERNEL__ #include #include #include @@ -160,6 +159,5 @@ struct gss_api_mech * gss_mech_get(struct gss_api_mech *); * corresponding call to gss_mech_put. */ void gss_mech_put(struct gss_api_mech *); -#endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_GSS_API_H */ diff --git a/include/linux/sunrpc/gss_err.h b/include/linux/sunrpc/gss_err.h index a6807867bd21..b73c329c83f2 100644 --- a/include/linux/sunrpc/gss_err.h +++ b/include/linux/sunrpc/gss_err.h @@ -34,8 +34,6 @@ #ifndef _LINUX_SUNRPC_GSS_ERR_H #define _LINUX_SUNRPC_GSS_ERR_H -#ifdef __KERNEL__ - typedef unsigned int OM_uint32; /* @@ -163,5 +161,4 @@ typedef unsigned int OM_uint32; /* XXXX This is a necessary evil until the spec is fixed */ #define GSS_S_CRED_UNAVAIL GSS_S_FAILURE -#endif /* __KERNEL__ */ #endif /* __LINUX_SUNRPC_GSS_ERR_H */ diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h index 4722b28ec36a..bea40d9f03a1 100644 --- a/include/linux/sunrpc/msg_prot.h +++ b/include/linux/sunrpc/msg_prot.h @@ -8,8 +8,6 @@ #ifndef _LINUX_SUNRPC_MSGPROT_H_ #define _LINUX_SUNRPC_MSGPROT_H_ -#ifdef __KERNEL__ /* user programs should get these from the rpc header files */ - #define RPC_VERSION 2 /* size of an XDR encoding unit in bytes, i.e. 32bit */ @@ -217,5 +215,4 @@ typedef __be32 rpc_fraghdr; /* Assume INET6_ADDRSTRLEN will always be larger than INET_ADDRSTRLEN... */ #define RPCBIND_MAXUADDRLEN RPCBIND_MAXUADDR6LEN -#endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_MSGPROT_H_ */ diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index e90b9bd99ded..cd188a527d16 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -2,8 +2,6 @@ #ifndef _LINUX_SUNRPC_RPC_PIPE_FS_H #define _LINUX_SUNRPC_RPC_PIPE_FS_H -#ifdef __KERNEL__ - #include struct rpc_pipe_dir_head { @@ -133,4 +131,3 @@ extern void unregister_rpc_pipefs(void); extern bool gssd_running(struct net *net); #endif -#endif diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h index 3e53a6e2ada7..b0003866a249 100644 --- a/include/linux/sunrpc/svcauth.h +++ b/include/linux/sunrpc/svcauth.h @@ -10,8 +10,6 @@ #ifndef _LINUX_SUNRPC_SVCAUTH_H_ #define _LINUX_SUNRPC_SVCAUTH_H_ -#ifdef __KERNEL__ - #include #include #include @@ -185,6 +183,4 @@ static inline unsigned long hash_mem(char const *buf, int length, int bits) return full_name_hash(NULL, buf, length) >> (32 - bits); } -#endif /* __KERNEL__ */ - #endif /* _LINUX_SUNRPC_SVCAUTH_H_ */ diff --git a/include/linux/sunrpc/svcauth_gss.h b/include/linux/sunrpc/svcauth_gss.h index a4528b26c8aa..ca39a388dc22 100644 --- a/include/linux/sunrpc/svcauth_gss.h +++ b/include/linux/sunrpc/svcauth_gss.h @@ -9,7 +9,6 @@ #ifndef _LINUX_SUNRPC_SVCAUTH_GSS_H #define _LINUX_SUNRPC_SVCAUTH_GSS_H -#ifdef __KERNEL__ #include #include #include @@ -24,5 +23,4 @@ void gss_svc_shutdown_net(struct net *net); int svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name); u32 svcauth_gss_flavor(struct auth_domain *dom); -#endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_SVCAUTH_GSS_H */ diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index f33e5013bdfb..b41f34977995 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -11,8 +11,6 @@ #ifndef _SUNRPC_XDR_H_ #define _SUNRPC_XDR_H_ -#ifdef __KERNEL__ - #include #include #include @@ -552,6 +550,5 @@ xdr_stream_decode_uint32_array(struct xdr_stream *xdr, *array = be32_to_cpup(p); return retval; } -#endif /* __KERNEL__ */ #endif /* _SUNRPC_XDR_H_ */ diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index d783e15ba898..874205227778 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -19,8 +19,6 @@ #include #include -#ifdef __KERNEL__ - #define RPC_MIN_SLOT_TABLE (2U) #define RPC_DEF_SLOT_TABLE (16U) #define RPC_MAX_SLOT_TABLE_LIMIT (65536U) @@ -505,6 +503,4 @@ static inline void xprt_inject_disconnect(struct rpc_xprt *xprt) } #endif -#endif /* __KERNEL__*/ - #endif /* _LINUX_SUNRPC_XPRT_H */ diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h index 7638dbe7bc50..30acd67d1627 100644 --- a/include/linux/sunrpc/xprtsock.h +++ b/include/linux/sunrpc/xprtsock.h @@ -8,8 +8,6 @@ #ifndef _LINUX_SUNRPC_XPRTSOCK_H #define _LINUX_SUNRPC_XPRTSOCK_H -#ifdef __KERNEL__ - int init_socket_xprt(void); void cleanup_socket_xprt(void); @@ -90,6 +88,4 @@ struct sock_xprt { #define XPRT_SOCK_WAKE_PENDING (6) #define XPRT_SOCK_WAKE_DISCONNECT (7) -#endif /* __KERNEL__ */ - #endif /* _LINUX_SUNRPC_XPRTSOCK_H */ -- cgit v1.2.3 From fb7dd0a1ba8690527c2394c6c55f909aa87d8f44 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 12 Nov 2019 16:34:23 +0100 Subject: lockd: remove __KERNEL__ ifdefs Remove the __KERNEL__ ifdefs from the non-UAPI sunrpc headers, as those can't be included from user space programs. Signed-off-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- include/linux/lockd/debug.h | 4 ---- include/linux/lockd/lockd.h | 4 ---- 2 files changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockd/debug.h b/include/linux/lockd/debug.h index e536c579827f..eede2ab5246f 100644 --- a/include/linux/lockd/debug.h +++ b/include/linux/lockd/debug.h @@ -10,8 +10,6 @@ #ifndef LINUX_LOCKD_DEBUG_H #define LINUX_LOCKD_DEBUG_H -#ifdef __KERNEL__ - #include /* @@ -25,8 +23,6 @@ # define ifdebug(flag) if (0) #endif -#endif /* __KERNEL__ */ - /* * Debug flags */ diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index d294dde9e546..666f5f310a04 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -10,8 +10,6 @@ #ifndef LINUX_LOCKD_LOCKD_H #define LINUX_LOCKD_LOCKD_H -#ifdef __KERNEL__ - #include #include #include @@ -373,6 +371,4 @@ static inline int nlm_compare_locks(const struct file_lock *fl1, extern const struct lock_manager_operations nlmsvc_lock_operations; -#endif /* __KERNEL__ */ - #endif /* LINUX_LOCKD_LOCKD_H */ -- cgit v1.2.3 From 6c0867022352027409f5a9fee1d3c6923f9e083e Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 12 Nov 2019 11:35:00 +0000 Subject: net: sfp: fix sfp_bus_add_upstream() warning When building with SFP disabled, the stub for sfp_bus_add_upstream() missed "inline". Add it. Fixes: 727b3668b730 ("net: sfp: rework upstream interface") Signed-off-by: Russell King Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/sfp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sfp.h b/include/linux/sfp.h index c8464de7cff5..3b35efd85bb1 100644 --- a/include/linux/sfp.h +++ b/include/linux/sfp.h @@ -563,8 +563,8 @@ static inline struct sfp_bus *sfp_bus_find_fwnode(struct fwnode_handle *fwnode) return NULL; } -static int sfp_bus_add_upstream(struct sfp_bus *bus, void *upstream, - const struct sfp_upstream_ops *ops) +static inline int sfp_bus_add_upstream(struct sfp_bus *bus, void *upstream, + const struct sfp_upstream_ops *ops) { return 0; } -- cgit v1.2.3 From 15d0b22c01e6320241fe4d570e02de2935b842bf Mon Sep 17 00:00:00 2001 From: Jerry Snitselaar Date: Mon, 2 Sep 2019 07:27:34 -0700 Subject: tpm: provide a way to override the chip returned durations Patch adds method ->update_durations to override returned durations in case TPM chip misbehaves for TPM 1.2 drivers. Cc: Peter Huewe Cc: Jason Gunthorpe Signed-off-by: Alexey Klimov Signed-off-by: Jerry Snitselaar Reviewed-by: Jarkko Sakkinen Tested-by: Jarkko Sakkinen (!update_durations path) Signed-off-by: Jarkko Sakkinen --- include/linux/tpm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tpm.h b/include/linux/tpm.h index 53c0ea9ec9df..bb1d1ac7081d 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -67,6 +67,8 @@ struct tpm_class_ops { u8 (*status) (struct tpm_chip *chip); void (*update_timeouts)(struct tpm_chip *chip, unsigned long *timeout_cap); + void (*update_durations)(struct tpm_chip *chip, + unsigned long *duration_cap); int (*go_idle)(struct tpm_chip *chip); int (*cmd_ready)(struct tpm_chip *chip); int (*request_locality)(struct tpm_chip *chip, int loc); -- cgit v1.2.3 From 74edff2d74c64ca5977a57efb5c238c8f5318ba9 Mon Sep 17 00:00:00 2001 From: Sumit Garg Date: Wed, 16 Oct 2019 10:44:52 +0530 Subject: tpm: Move tpm_buf code to include/linux/ Move tpm_buf code to common include/linux/tpm.h header so that it can be reused via other subsystems like trusted keys etc. Also rename trusted keys and asymmetric keys usage of TPM 1.x buffer implementation to tpm1_buf to avoid any compilation errors. Suggested-by: Jarkko Sakkinen Signed-off-by: Sumit Garg Reviewed-by: Jerry Snitselaar Reviewed-by: Jarkko Sakkinen Tested-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen --- include/linux/tpm.h | 212 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 212 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tpm.h b/include/linux/tpm.h index bb1d1ac7081d..c78119fcac7f 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -21,6 +21,7 @@ #include #include #include +#include #include #define TPM_DIGEST_SIZE 20 /* Max TPM v1.2 PCR size */ @@ -163,6 +164,217 @@ struct tpm_chip { int locality; }; +#define TPM_HEADER_SIZE 10 + +enum tpm2_const { + TPM2_PLATFORM_PCR = 24, + TPM2_PCR_SELECT_MIN = ((TPM2_PLATFORM_PCR + 7) / 8), +}; + +enum tpm2_timeouts { + TPM2_TIMEOUT_A = 750, + TPM2_TIMEOUT_B = 2000, + TPM2_TIMEOUT_C = 200, + TPM2_TIMEOUT_D = 30, + TPM2_DURATION_SHORT = 20, + TPM2_DURATION_MEDIUM = 750, + TPM2_DURATION_LONG = 2000, + TPM2_DURATION_LONG_LONG = 300000, + TPM2_DURATION_DEFAULT = 120000, +}; + +enum tpm2_structures { + TPM2_ST_NO_SESSIONS = 0x8001, + TPM2_ST_SESSIONS = 0x8002, +}; + +/* Indicates from what layer of the software stack the error comes from */ +#define TSS2_RC_LAYER_SHIFT 16 +#define TSS2_RESMGR_TPM_RC_LAYER (11 << TSS2_RC_LAYER_SHIFT) + +enum tpm2_return_codes { + TPM2_RC_SUCCESS = 0x0000, + TPM2_RC_HASH = 0x0083, /* RC_FMT1 */ + TPM2_RC_HANDLE = 0x008B, + TPM2_RC_INITIALIZE = 0x0100, /* RC_VER1 */ + TPM2_RC_FAILURE = 0x0101, + TPM2_RC_DISABLED = 0x0120, + TPM2_RC_COMMAND_CODE = 0x0143, + TPM2_RC_TESTING = 0x090A, /* RC_WARN */ + TPM2_RC_REFERENCE_H0 = 0x0910, + TPM2_RC_RETRY = 0x0922, +}; + +enum tpm2_command_codes { + TPM2_CC_FIRST = 0x011F, + TPM2_CC_HIERARCHY_CONTROL = 0x0121, + TPM2_CC_HIERARCHY_CHANGE_AUTH = 0x0129, + TPM2_CC_CREATE_PRIMARY = 0x0131, + TPM2_CC_SEQUENCE_COMPLETE = 0x013E, + TPM2_CC_SELF_TEST = 0x0143, + TPM2_CC_STARTUP = 0x0144, + TPM2_CC_SHUTDOWN = 0x0145, + TPM2_CC_NV_READ = 0x014E, + TPM2_CC_CREATE = 0x0153, + TPM2_CC_LOAD = 0x0157, + TPM2_CC_SEQUENCE_UPDATE = 0x015C, + TPM2_CC_UNSEAL = 0x015E, + TPM2_CC_CONTEXT_LOAD = 0x0161, + TPM2_CC_CONTEXT_SAVE = 0x0162, + TPM2_CC_FLUSH_CONTEXT = 0x0165, + TPM2_CC_VERIFY_SIGNATURE = 0x0177, + TPM2_CC_GET_CAPABILITY = 0x017A, + TPM2_CC_GET_RANDOM = 0x017B, + TPM2_CC_PCR_READ = 0x017E, + TPM2_CC_PCR_EXTEND = 0x0182, + TPM2_CC_EVENT_SEQUENCE_COMPLETE = 0x0185, + TPM2_CC_HASH_SEQUENCE_START = 0x0186, + TPM2_CC_CREATE_LOADED = 0x0191, + TPM2_CC_LAST = 0x0193, /* Spec 1.36 */ +}; + +enum tpm2_permanent_handles { + TPM2_RS_PW = 0x40000009, +}; + +enum tpm2_capabilities { + TPM2_CAP_HANDLES = 1, + TPM2_CAP_COMMANDS = 2, + TPM2_CAP_PCRS = 5, + TPM2_CAP_TPM_PROPERTIES = 6, +}; + +enum tpm2_properties { + TPM_PT_TOTAL_COMMANDS = 0x0129, +}; + +enum tpm2_startup_types { + TPM2_SU_CLEAR = 0x0000, + TPM2_SU_STATE = 0x0001, +}; + +enum tpm2_cc_attrs { + TPM2_CC_ATTR_CHANDLES = 25, + TPM2_CC_ATTR_RHANDLE = 28, +}; + +#define TPM_VID_INTEL 0x8086 +#define TPM_VID_WINBOND 0x1050 +#define TPM_VID_STM 0x104A + +enum tpm_chip_flags { + TPM_CHIP_FLAG_TPM2 = BIT(1), + TPM_CHIP_FLAG_IRQ = BIT(2), + TPM_CHIP_FLAG_VIRTUAL = BIT(3), + TPM_CHIP_FLAG_HAVE_TIMEOUTS = BIT(4), + TPM_CHIP_FLAG_ALWAYS_POWERED = BIT(5), + TPM_CHIP_FLAG_FIRMWARE_POWER_MANAGED = BIT(6), +}; + +#define to_tpm_chip(d) container_of(d, struct tpm_chip, dev) + +struct tpm_header { + __be16 tag; + __be32 length; + union { + __be32 ordinal; + __be32 return_code; + }; +} __packed; + +/* A string buffer type for constructing TPM commands. This is based on the + * ideas of string buffer code in security/keys/trusted.h but is heap based + * in order to keep the stack usage minimal. + */ + +enum tpm_buf_flags { + TPM_BUF_OVERFLOW = BIT(0), +}; + +struct tpm_buf { + unsigned int flags; + u8 *data; +}; + +static inline void tpm_buf_reset(struct tpm_buf *buf, u16 tag, u32 ordinal) +{ + struct tpm_header *head = (struct tpm_header *)buf->data; + + head->tag = cpu_to_be16(tag); + head->length = cpu_to_be32(sizeof(*head)); + head->ordinal = cpu_to_be32(ordinal); +} + +static inline int tpm_buf_init(struct tpm_buf *buf, u16 tag, u32 ordinal) +{ + buf->data = (u8 *)__get_free_page(GFP_KERNEL); + if (!buf->data) + return -ENOMEM; + + buf->flags = 0; + tpm_buf_reset(buf, tag, ordinal); + return 0; +} + +static inline void tpm_buf_destroy(struct tpm_buf *buf) +{ + free_page((unsigned long)buf->data); +} + +static inline u32 tpm_buf_length(struct tpm_buf *buf) +{ + struct tpm_header *head = (struct tpm_header *)buf->data; + + return be32_to_cpu(head->length); +} + +static inline u16 tpm_buf_tag(struct tpm_buf *buf) +{ + struct tpm_header *head = (struct tpm_header *)buf->data; + + return be16_to_cpu(head->tag); +} + +static inline void tpm_buf_append(struct tpm_buf *buf, + const unsigned char *new_data, + unsigned int new_len) +{ + struct tpm_header *head = (struct tpm_header *)buf->data; + u32 len = tpm_buf_length(buf); + + /* Return silently if overflow has already happened. */ + if (buf->flags & TPM_BUF_OVERFLOW) + return; + + if ((len + new_len) > PAGE_SIZE) { + WARN(1, "tpm_buf: overflow\n"); + buf->flags |= TPM_BUF_OVERFLOW; + return; + } + + memcpy(&buf->data[len], new_data, new_len); + head->length = cpu_to_be32(len + new_len); +} + +static inline void tpm_buf_append_u8(struct tpm_buf *buf, const u8 value) +{ + tpm_buf_append(buf, &value, 1); +} + +static inline void tpm_buf_append_u16(struct tpm_buf *buf, const u16 value) +{ + __be16 value2 = cpu_to_be16(value); + + tpm_buf_append(buf, (u8 *) &value2, 2); +} + +static inline void tpm_buf_append_u32(struct tpm_buf *buf, const u32 value) +{ + __be32 value2 = cpu_to_be32(value); + + tpm_buf_append(buf, (u8 *) &value2, 4); +} + #if defined(CONFIG_TCG_TPM) || defined(CONFIG_TCG_TPM_MODULE) extern int tpm_is_tpm2(struct tpm_chip *chip); -- cgit v1.2.3 From 2e19e10131a08dc65079c755fb6e8af936bfedbd Mon Sep 17 00:00:00 2001 From: Sumit Garg Date: Wed, 16 Oct 2019 10:44:55 +0530 Subject: KEYS: trusted: Move TPM2 trusted keys code Move TPM2 trusted keys code to trusted keys subsystem. The reason being it's better to consolidate all the trusted keys code to a single location so that it can be maintained sanely. Also, utilize existing tpm_send() exported API which wraps the internal tpm_transmit_cmd() API. Suggested-by: Jarkko Sakkinen Signed-off-by: Sumit Garg Reviewed-by: Jarkko Sakkinen Tested-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen --- include/linux/tpm.h | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tpm.h b/include/linux/tpm.h index c78119fcac7f..0d6e949ba315 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -296,6 +296,19 @@ struct tpm_buf { u8 *data; }; +enum tpm2_object_attributes { + TPM2_OA_USER_WITH_AUTH = BIT(6), +}; + +enum tpm2_session_attributes { + TPM2_SA_CONTINUE_SESSION = BIT(0), +}; + +struct tpm2_hash { + unsigned int crypto_id; + unsigned int tpm_id; +}; + static inline void tpm_buf_reset(struct tpm_buf *buf, u16 tag, u32 ordinal) { struct tpm_header *head = (struct tpm_header *)buf->data; @@ -375,6 +388,11 @@ static inline void tpm_buf_append_u32(struct tpm_buf *buf, const u32 value) tpm_buf_append(buf, (u8 *) &value2, 4); } +static inline u32 tpm2_rc_value(u32 rc) +{ + return (rc & BIT(7)) ? rc & 0xff : rc; +} + #if defined(CONFIG_TCG_TPM) || defined(CONFIG_TCG_TPM_MODULE) extern int tpm_is_tpm2(struct tpm_chip *chip); @@ -384,12 +402,6 @@ extern int tpm_pcr_extend(struct tpm_chip *chip, u32 pcr_idx, struct tpm_digest *digests); extern int tpm_send(struct tpm_chip *chip, void *cmd, size_t buflen); extern int tpm_get_random(struct tpm_chip *chip, u8 *data, size_t max); -extern int tpm_seal_trusted(struct tpm_chip *chip, - struct trusted_key_payload *payload, - struct trusted_key_options *options); -extern int tpm_unseal_trusted(struct tpm_chip *chip, - struct trusted_key_payload *payload, - struct trusted_key_options *options); extern struct tpm_chip *tpm_default_chip(void); #else static inline int tpm_is_tpm2(struct tpm_chip *chip) @@ -418,18 +430,6 @@ static inline int tpm_get_random(struct tpm_chip *chip, u8 *data, size_t max) return -ENODEV; } -static inline int tpm_seal_trusted(struct tpm_chip *chip, - struct trusted_key_payload *payload, - struct trusted_key_options *options) -{ - return -ENODEV; -} -static inline int tpm_unseal_trusted(struct tpm_chip *chip, - struct trusted_key_payload *payload, - struct trusted_key_options *options) -{ - return -ENODEV; -} static inline struct tpm_chip *tpm_default_chip(void) { return NULL; -- cgit v1.2.3 From 56f434f40f059eb3769d50b9c244a850096c3d6f Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 12 Nov 2019 16:22:18 -0400 Subject: mm/mmu_notifier: define the header pre-processor parts even if disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that we have KERNEL_HEADER_TEST all headers are generally compile tested, so relying on makefile tricks to avoid compiling code that depends on CONFIG_MMU_NOTIFIER is more annoying. Instead follow the usual pattern and provide most of the header with only the functions stubbed out when CONFIG_MMU_NOTIFIER is disabled. This ensures code compiles no matter what the config setting is. While here, struct mmu_notifier_mm is private to mmu_notifier.c, move it. Link: https://lore.kernel.org/r/20191112202231.3856-2-jgg@ziepe.ca Reviewed-by: Jérôme Glisse Tested-by: Ralph Campbell Reviewed-by: John Hubbard Reviewed-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe --- include/linux/mmu_notifier.h | 46 ++++++++++++++++---------------------------- 1 file changed, 17 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 1bd8e6a09a3c..12bd603d318c 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -7,8 +7,9 @@ #include #include +struct mmu_notifier_mm; struct mmu_notifier; -struct mmu_notifier_ops; +struct mmu_notifier_range; /** * enum mmu_notifier_event - reason for the mmu notifier callback @@ -40,36 +41,8 @@ enum mmu_notifier_event { MMU_NOTIFY_SOFT_DIRTY, }; -#ifdef CONFIG_MMU_NOTIFIER - -#ifdef CONFIG_LOCKDEP -extern struct lockdep_map __mmu_notifier_invalidate_range_start_map; -#endif - -/* - * The mmu notifier_mm structure is allocated and installed in - * mm->mmu_notifier_mm inside the mm_take_all_locks() protected - * critical section and it's released only when mm_count reaches zero - * in mmdrop(). - */ -struct mmu_notifier_mm { - /* all mmu notifiers registerd in this mm are queued in this list */ - struct hlist_head list; - /* to serialize the list modifications and hlist_unhashed */ - spinlock_t lock; -}; - #define MMU_NOTIFIER_RANGE_BLOCKABLE (1 << 0) -struct mmu_notifier_range { - struct vm_area_struct *vma; - struct mm_struct *mm; - unsigned long start; - unsigned long end; - unsigned flags; - enum mmu_notifier_event event; -}; - struct mmu_notifier_ops { /* * Called either by mmu_notifier_unregister or when the mm is @@ -249,6 +222,21 @@ struct mmu_notifier { unsigned int users; }; +#ifdef CONFIG_MMU_NOTIFIER + +#ifdef CONFIG_LOCKDEP +extern struct lockdep_map __mmu_notifier_invalidate_range_start_map; +#endif + +struct mmu_notifier_range { + struct vm_area_struct *vma; + struct mm_struct *mm; + unsigned long start; + unsigned long end; + unsigned flags; + enum mmu_notifier_event event; +}; + static inline int mm_has_notifiers(struct mm_struct *mm) { return unlikely(mm->mmu_notifier_mm); -- cgit v1.2.3 From d41003513e61dd9d4974cb441d30b63650b85654 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 11 Nov 2019 11:39:30 +0900 Subject: block: rework zone reporting Avoid the need to allocate a potentially large array of struct blk_zone in the block layer by switching the ->report_zones method interface to a callback model. Now the caller simply supplies a callback that is executed on each reported zone, and private data for it. Signed-off-by: Christoph Hellwig Signed-off-by: Shin'ichiro Kawasaki Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Mike Snitzer Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 15 +++++++-------- include/linux/device-mapper.h | 24 +++++++++++++++++++----- 2 files changed, 26 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6a4f7abbdcf7..397bb9bc230b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -349,17 +349,16 @@ struct queue_limits { enum blk_zoned_model zoned; }; +typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx, + void *data); + #ifdef CONFIG_BLK_DEV_ZONED -/* - * Maximum number of zones to report with a single report zones command. - */ -#define BLK_ZONED_REPORT_MAX_ZONES 8192U +#define BLK_ALL_ZONES ((unsigned int)-1) +int blkdev_report_zones(struct block_device *bdev, sector_t sector, + unsigned int nr_zones, report_zones_cb cb, void *data); extern unsigned int blkdev_nr_zones(struct block_device *bdev); -extern int blkdev_report_zones(struct block_device *bdev, - sector_t sector, struct blk_zone *zones, - unsigned int *nr_zones); extern int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op, sector_t sectors, sector_t nr_sectors, gfp_t gfp_mask); @@ -1709,7 +1708,7 @@ struct block_device_operations { /* this callback is with swap_lock and sometimes page table lock held */ void (*swap_slot_free_notify) (struct block_device *, unsigned long); int (*report_zones)(struct gendisk *, sector_t sector, - struct blk_zone *zones, unsigned int *nr_zones); + unsigned int nr_zones, report_zones_cb cb, void *data); struct module *owner; const struct pr_ops *pr_ops; }; diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 399ad8632356..a164cc81b710 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -17,6 +17,7 @@ struct dm_dev; struct dm_target; struct dm_table; +struct dm_report_zones_args; struct mapped_device; struct bio_vec; @@ -93,9 +94,9 @@ typedef int (*dm_message_fn) (struct dm_target *ti, unsigned argc, char **argv, typedef int (*dm_prepare_ioctl_fn) (struct dm_target *ti, struct block_device **bdev); -typedef int (*dm_report_zones_fn) (struct dm_target *ti, sector_t sector, - struct blk_zone *zones, - unsigned int *nr_zones); +typedef int (*dm_report_zones_fn) (struct dm_target *ti, + struct dm_report_zones_args *args, + unsigned int nr_zones); /* * These iteration functions are typically used to check (and combine) @@ -422,10 +423,23 @@ struct gendisk *dm_disk(struct mapped_device *md); int dm_suspended(struct dm_target *ti); int dm_noflush_suspending(struct dm_target *ti); void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors); -void dm_remap_zone_report(struct dm_target *ti, sector_t start, - struct blk_zone *zones, unsigned int *nr_zones); union map_info *dm_get_rq_mapinfo(struct request *rq); +#ifdef CONFIG_BLK_DEV_ZONED +struct dm_report_zones_args { + struct dm_target *tgt; + sector_t next_sector; + + void *orig_data; + report_zones_cb orig_cb; + unsigned int zone_idx; + + /* must be filled by ->report_zones before calling dm_report_zones_cb */ + sector_t start; +}; +int dm_report_zones_cb(struct blk_zone *zone, unsigned int idx, void *data); +#endif /* CONFIG_BLK_DEV_ZONED */ + /* * Device mapper functions to parse and create devices specified by the * parameter "dm-mod.create=" -- cgit v1.2.3 From c29f74e0df7a02b8303bcdce93a7c0132d62577a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 12 Nov 2019 00:29:56 +0100 Subject: netfilter: nf_flow_table: hardware offload support This patch adds the dataplane hardware offload to the flowtable infrastructure. Three new flags represent the hardware state of this flow: * FLOW_OFFLOAD_HW: This flow entry resides in the hardware. * FLOW_OFFLOAD_HW_DYING: This flow entry has been scheduled to be remove from hardware. This might be triggered by either packet path (via TCP RST/FIN packet) or via aging. * FLOW_OFFLOAD_HW_DEAD: This flow entry has been already removed from the hardware, the software garbage collector can remove it from the software flowtable. This patch supports for: * IPv4 only. * Aging via FLOW_CLS_STATS, no packet and byte counter synchronization at this stage. This patch also adds the action callback that specifies how to convert the flow entry into the flow_rule object that is passed to the driver. Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f857f01234f7..9e6fb8524d91 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -848,6 +848,7 @@ enum tc_setup_type { TC_SETUP_ROOT_QDISC, TC_SETUP_QDISC_GRED, TC_SETUP_QDISC_TAPRIO, + TC_SETUP_FT, }; /* These structures hold the attributes of bpf state that are being passed -- cgit v1.2.3 From a4faf00d994c40e64f656805ac375c65e324eefb Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Fri, 25 Oct 2019 17:08:33 +0300 Subject: perf/aux: Allow using AUX data in perf samples AUX data can be used to annotate perf events such as performance counters or tracepoints/breakpoints by including it in sample records when PERF_SAMPLE_AUX flag is set. Such samples would be instrumental in debugging and profiling by providing, for example, a history of instruction flow leading up to the event's overflow. The implementation makes use of grouping an AUX event with all the events that wish to take samples of the AUX data, such that the former is the group leader. The samplees should also specify the desired size of the AUX sample via attr.aux_sample_size. AUX capable PMUs need to explicitly add support for sampling, because it relies on a new callback to take a snapshot of the buffer without touching the event states. Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Jiri Olsa Cc: Linus Torvalds Cc: Mark Rutland Cc: Namhyung Kim Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: adrian.hunter@intel.com Cc: mathieu.poirier@linaro.org Link: https://lkml.kernel.org/r/20191025140835.53665-2-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 011dcbdbccc2..34c7c6910026 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -249,6 +249,8 @@ struct perf_event; #define PERF_PMU_CAP_NO_EXCLUDE 0x80 #define PERF_PMU_CAP_AUX_OUTPUT 0x100 +struct perf_output_handle; + /** * struct pmu - generic performance monitoring unit */ @@ -432,6 +434,19 @@ struct pmu { */ void (*free_aux) (void *aux); /* optional */ + /* + * Take a snapshot of the AUX buffer without touching the event + * state, so that preempting ->start()/->stop() callbacks does + * not interfere with their logic. Called in PMI context. + * + * Returns the size of AUX data copied to the output handle. + * + * Optional. + */ + long (*snapshot_aux) (struct perf_event *event, + struct perf_output_handle *handle, + unsigned long size); + /* * Validate address range filters: make sure the HW supports the * requested configuration and number of filters; return 0 if the @@ -973,6 +988,7 @@ struct perf_sample_data { u32 reserved; } cpu_entry; struct perf_callchain_entry *callchain; + u64 aux_size; /* * regs_user may point to task_pt_regs or to regs_user_copy, depending @@ -1362,6 +1378,9 @@ extern unsigned int perf_output_copy(struct perf_output_handle *handle, const void *buf, unsigned int len); extern unsigned int perf_output_skip(struct perf_output_handle *handle, unsigned int len); +extern long perf_output_copy_aux(struct perf_output_handle *aux_handle, + struct perf_output_handle *handle, + unsigned long from, unsigned long to); extern int perf_swevent_get_recursion_context(void); extern void perf_swevent_put_recursion_context(int rctx); extern u64 perf_swevent_set_period(struct perf_event *event); -- cgit v1.2.3 From cf25e24db61cc9df42c47485a2ec2bff4e9a3692 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 7 Nov 2019 11:07:58 +0100 Subject: time: Rename tsk->real_start_time to ->start_boottime Since it stores CLOCK_BOOTTIME, not, as the name suggests, CLOCK_REALTIME, let's rename ->real_start_time to ->start_bootime. Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 67a1d86981a9..254128952eab 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -857,7 +857,7 @@ struct task_struct { u64 start_time; /* Boot based time in nsecs: */ - u64 real_start_time; + u64 start_boottime; /* MM fault and swap info: this can arguably be seen as either mm-specific or thread-specific: */ unsigned long min_flt; -- cgit v1.2.3 From 2079fe6ea8cbd2fb2fbadba911f1eca6c362eb9b Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Tue, 15 Oct 2019 14:12:38 +0100 Subject: ARM: OMAP2+: SmartReflex: add omap_sr_pdata definition The omap_sr_pdata is not declared but is exported, so add a define for it to fix the following warning: arch/arm/mach-omap2/pdata-quirks.c:609:36: warning: symbol 'omap_sr_pdata' was not declared. Should it be static? Signed-off-by: Ben Dooks Signed-off-by: Rafael J. Wysocki --- include/linux/power/smartreflex.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/power/smartreflex.h b/include/linux/power/smartreflex.h index d0b37e937037..971c9264179e 100644 --- a/include/linux/power/smartreflex.h +++ b/include/linux/power/smartreflex.h @@ -293,6 +293,9 @@ struct omap_sr_data { struct voltagedomain *voltdm; }; + +extern struct omap_sr_data omap_sr_pdata[OMAP_SR_NR]; + #ifdef CONFIG_POWER_AVS_OMAP /* Smartreflex module enable/disable interface */ -- cgit v1.2.3 From ca765a8cfe0c78bfa47b9d67121f4e342d4b4512 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 16 Oct 2019 15:16:03 +0200 Subject: PM / Domains: Introduce dev_pm_domain_start() For a subsystem/driver that either doesn't support runtime PM or makes use of pm_runtime_set_active() during ->probe(), may try to access its device when probing, even if it may not be fully powered on from the PM domain's point of view. This may be the case when the used PM domain is a genpd provider, that implements genpd's ->start|stop() device callbacks. There are cases where the subsystem/driver managed to avoid the above problem, simply by calling pm_runtime_enable() and pm_runtime_get_sync() during ->probe(). However, this approach comes with a drawback, especially if the subsystem/driver implements a ->runtime_resume() callback. More precisely, the subsystem/driver then needs to use a device flag, which is checked in its ->runtime_resume() callback, as to avoid powering on its resources the first time the callback is invoked. This is needed because the subsystem/driver has already powered on the resources for the device, during ->probe() and before it called pm_runtime_get_sync(). In a way to avoid this boilerplate code and the inefficient check for "if (first_time_suspend)" in the ->runtime_resume() callback for these subsystems/drivers, let's introduce and export a dev_pm_domain_start() function, that may be called during ->probe() instead. Moreover, let the dev_pm_domain_start() invoke an optional ->start() callback, added to the struct dev_pm_domain, as to allow a PM domain specific implementation. Signed-off-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- include/linux/pm.h | 2 ++ include/linux/pm_domain.h | 5 +++++ 2 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm.h b/include/linux/pm.h index 4c441be03079..e057d1fa2469 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -637,6 +637,7 @@ extern void dev_pm_put_subsys_data(struct device *dev); * struct dev_pm_domain - power management domain representation. * * @ops: Power management operations associated with this domain. + * @start: Called when a user needs to start the device via the domain. * @detach: Called when removing a device from the domain. * @activate: Called before executing probe routines for bus types and drivers. * @sync: Called after successful driver probe. @@ -648,6 +649,7 @@ extern void dev_pm_put_subsys_data(struct device *dev); */ struct dev_pm_domain { struct dev_pm_ops ops; + int (*start)(struct device *dev); void (*detach)(struct device *dev, bool power_off); int (*activate)(struct device *dev); void (*sync)(struct device *dev); diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index baf02ff91a31..5a31c711b896 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -366,6 +366,7 @@ struct device *dev_pm_domain_attach_by_id(struct device *dev, struct device *dev_pm_domain_attach_by_name(struct device *dev, const char *name); void dev_pm_domain_detach(struct device *dev, bool power_off); +int dev_pm_domain_start(struct device *dev); void dev_pm_domain_set(struct device *dev, struct dev_pm_domain *pd); #else static inline int dev_pm_domain_attach(struct device *dev, bool power_on) @@ -383,6 +384,10 @@ static inline struct device *dev_pm_domain_attach_by_name(struct device *dev, return NULL; } static inline void dev_pm_domain_detach(struct device *dev, bool power_off) {} +static inline int dev_pm_domain_start(struct device *dev) +{ + return 0; +} static inline void dev_pm_domain_set(struct device *dev, struct dev_pm_domain *pd) {} #endif -- cgit v1.2.3 From bee3bbe616a2c8de641a64d874f9206835bd4401 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Tue, 12 Nov 2019 18:35:59 -0800 Subject: driver core: Clarify documentation for fwnode_operations.add_links() The wording was a bit ambiguous. So update it to make it clear. Signed-off-by: Saravana Kannan Reviewed-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20191113023559.62295-2-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/fwnode.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 766ff9bb5876..23df37f85398 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -94,15 +94,16 @@ struct fwnode_reference_args { * available suppliers. * * Return 0 if device links have been successfully created to all - * the suppliers this device needs to create device links to or if - * the supplier information is not known. + * the known suppliers of this device or if the supplier + * information is not known. * - * Return -ENODEV if and only if the suppliers needed for probing - * the device are not yet available to create device links to. + * Return -ENODEV if the suppliers needed for probing this device + * have not been registered yet (because device links can only be + * created to devices registered with the driver core). * - * Return -EAGAIN if there are suppliers that need to be linked to - * that are not yet available but none of those suppliers are - * necessary for probing this device. + * Return -EAGAIN if some of the suppliers of this device have not + * been registered yet, but none of those suppliers are necessary + * for probing the device. */ struct fwnode_operations { struct fwnode_handle *(*get)(struct fwnode_handle *fwnode); -- cgit v1.2.3 From c059d579961d62c1a675cb89ef799902e6bce815 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Wed, 16 Oct 2019 15:45:37 +0100 Subject: parport: daisy: avoid hardcoded name The daisy device name is hardcoded, define it in the header file and use it in the code. Signed-off-by: Sudip Mukherjee Link: https://lore.kernel.org/r/20191016144540.18810-1-sudipm.mukherjee@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/linux/parport.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/parport.h b/include/linux/parport.h index 397607a0c0eb..13932ce8b37b 100644 --- a/include/linux/parport.h +++ b/include/linux/parport.h @@ -460,6 +460,7 @@ extern size_t parport_ieee1284_epp_read_addr (struct parport *, void *, size_t, int); /* IEEE1284.3 functions */ +#define daisy_dev_name "Device ID probe" extern int parport_daisy_init (struct parport *port); extern void parport_daisy_fini (struct parport *port); extern struct pardevice *parport_open (int devnum, const char *name); -- cgit v1.2.3 From b056ca1c2f01b2d261c2dd6d167c17ac27977034 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 4 Nov 2019 18:09:39 +0200 Subject: gpiolib: Introduce ->add_pin_ranges() callback When IRQ chip is being added by GPIO library, the ACPI based platform expects GPIO <-> pin mapping ranges to be initialized in order to correctly initialize ACPI event mechanism on affected platforms. Unfortunately this step is missed. Introduce ->add_pin_ranges() callback to fill the above mentioned gap. Signed-off-by: Andy Shevchenko Reviewed-by: Linus Walleij Reviewed-by: Mika Westerberg Reviewed-by: Hans de Goede Tested-by: Hans de Goede --- include/linux/gpio/driver.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index cc9ade4552d9..e2480ef94c55 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -289,6 +289,9 @@ struct gpio_irq_chip { * state (such as pullup/pulldown configuration). * @init_valid_mask: optional routine to initialize @valid_mask, to be used if * not all GPIOs are valid. + * @add_pin_ranges: optional routine to initialize pin ranges, to be used when + * requires special mapping of the pins that provides GPIO functionality. + * It is called after adding GPIO chip and before adding IRQ chip. * @base: identifies the first GPIO number handled by this chip; * or, if negative during registration, requests dynamic ID allocation. * DEPRECATION: providing anything non-negative and nailing the base @@ -379,6 +382,8 @@ struct gpio_chip { unsigned long *valid_mask, unsigned int ngpios); + int (*add_pin_ranges)(struct gpio_chip *chip); + int base; u16 ngpio; const char *const *names; -- cgit v1.2.3 From 763e34e74bb7d5c316015e2e39fcc8520bfd071c Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 8 Nov 2019 13:07:06 -0500 Subject: ftrace: Add register_ftrace_direct() Add the start of the functionality to allow other trampolines to use the ftrace mcount/fentry/nop location. This adds two new functions: register_ftrace_direct() and unregister_ftrace_direct() Both take two parameters: the first is the instruction address of where the mcount/fentry/nop exists, and the second is the trampoline to have that location called. This will handle cases where ftrace is already used on that same location, and will make it still work, where the registered direct called trampoline will get called after all the registered ftrace callers are handled. Currently, it will not allow for IP_MODIFY functions to be called at the same locations, which include some kprobes and live kernel patching. At this point, no architecture supports this. This is only the start of implementing the framework. Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 8385cafe4f9f..efe3e521aff4 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -144,6 +144,8 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops); * TRACE_ARRAY - The ops->private points to a trace_array descriptor. * PERMANENT - Set when the ops is permanent and should not be affected by * ftrace_enabled. + * DIRECT - Used by the direct ftrace_ops helper for direct functions + * (internal ftrace only, should not be used by others) */ enum { FTRACE_OPS_FL_ENABLED = 1 << 0, @@ -163,6 +165,7 @@ enum { FTRACE_OPS_FL_RCU = 1 << 14, FTRACE_OPS_FL_TRACE_ARRAY = 1 << 15, FTRACE_OPS_FL_PERMANENT = 1 << 16, + FTRACE_OPS_FL_DIRECT = 1 << 17, }; #ifdef CONFIG_DYNAMIC_FTRACE @@ -242,6 +245,32 @@ static inline void ftrace_free_init_mem(void) { } static inline void ftrace_free_mem(struct module *mod, void *start, void *end) { } #endif /* CONFIG_FUNCTION_TRACER */ +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +int register_ftrace_direct(unsigned long ip, unsigned long addr); +int unregister_ftrace_direct(unsigned long ip, unsigned long addr); +#else +static inline int register_ftrace_direct(unsigned long ip, unsigned long addr) +{ + return -ENODEV; +} +static inline int unregister_ftrace_direct(unsigned long ip, unsigned long addr) +{ + return -ENODEV; +} +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ + +#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +/* + * This must be implemented by the architecture. + * It is the way the ftrace direct_ops helper, when called + * via ftrace (because there's other callbacks besides the + * direct call), can inform the architecture's trampoline that this + * routine has a direct caller, and what the caller is. + */ +static inline void arch_ftrace_set_direct_caller(struct pt_regs *regs, + unsigned long addr) { } +#endif /* CONFIG_HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ + #ifdef CONFIG_STACK_TRACER extern int stack_tracer_enabled; @@ -333,6 +362,7 @@ bool is_ftrace_trampoline(unsigned long addr); * REGS_EN - the function is set up to save regs. * IPMODIFY - the record allows for the IP address to be changed. * DISABLED - the record is not ready to be touched yet + * DIRECT - there is a direct function to call * * When a new ftrace_ops is registered and wants a function to save * pt_regs, the rec->flag REGS is set. When the function has been @@ -348,10 +378,12 @@ enum { FTRACE_FL_TRAMP_EN = (1UL << 27), FTRACE_FL_IPMODIFY = (1UL << 26), FTRACE_FL_DISABLED = (1UL << 25), + FTRACE_FL_DIRECT = (1UL << 24), + FTRACE_FL_DIRECT_EN = (1UL << 23), }; -#define FTRACE_REF_MAX_SHIFT 25 -#define FTRACE_FL_BITS 7 +#define FTRACE_REF_MAX_SHIFT 23 +#define FTRACE_FL_BITS 9 #define FTRACE_FL_MASKED_BITS ((1UL << FTRACE_FL_BITS) - 1) #define FTRACE_FL_MASK (FTRACE_FL_MASKED_BITS << FTRACE_REF_MAX_SHIFT) #define FTRACE_REF_MAX ((1UL << FTRACE_REF_MAX_SHIFT) - 1) -- cgit v1.2.3 From 013bf0da0474816f57739daa006c8564ad7396a3 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 8 Nov 2019 13:11:27 -0500 Subject: ftrace: Add ftrace_find_direct_func() As function_graph tracer modifies the return address to insert a trampoline to trace the return of a function, it must be aware of a direct caller, as when it gets called, the function's return address may not be at on the stack where it expects. It may have to see if that return address points to the a direct caller and adjust if it is. Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index efe3e521aff4..8b37b8105398 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -51,6 +51,7 @@ static inline void early_trace_init(void) { } struct module; struct ftrace_hash; +struct ftrace_direct_func; #if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_MODULES) && \ defined(CONFIG_DYNAMIC_FTRACE) @@ -248,6 +249,7 @@ static inline void ftrace_free_mem(struct module *mod, void *start, void *end) { #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS int register_ftrace_direct(unsigned long ip, unsigned long addr); int unregister_ftrace_direct(unsigned long ip, unsigned long addr); +struct ftrace_direct_func *ftrace_find_direct_func(unsigned long addr); #else static inline int register_ftrace_direct(unsigned long ip, unsigned long addr) { @@ -257,6 +259,10 @@ static inline int unregister_ftrace_direct(unsigned long ip, unsigned long addr) { return -ENODEV; } +static inline struct ftrace_direct_func *ftrace_find_direct_func(unsigned long addr) +{ + return NULL; +} #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ #ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS -- cgit v1.2.3 From 562955fe6a558b9ef98ad87c470314946338cb2f Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 8 Nov 2019 13:11:39 -0500 Subject: ftrace/x86: Add register_ftrace_direct() for custom trampolines Enable x86 to allow for register_ftrace_direct(), where a custom trampoline may be called directly from an ftrace mcount/fentry location. Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 8b37b8105398..2bc7bd6b8387 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -272,6 +272,12 @@ static inline struct ftrace_direct_func *ftrace_find_direct_func(unsigned long a * via ftrace (because there's other callbacks besides the * direct call), can inform the architecture's trampoline that this * routine has a direct caller, and what the caller is. + * + * For example, in x86, it returns the direct caller + * callback function via the regs->orig_ax parameter. + * Then in the ftrace trampoline, if this is set, it makes + * the return from the trampoline jump to the direct caller + * instead of going back to the function it just traced. */ static inline void arch_ftrace_set_direct_caller(struct pt_regs *regs, unsigned long addr) { } -- cgit v1.2.3 From a3ad1a7e39689005cb04a4f2adb82f9d55b4724f Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 8 Nov 2019 13:12:57 -0500 Subject: ftrace/x86: Add a counter to test function_graph with direct As testing for direct calls from the function graph tracer adds a little overhead (which is a lot when tracing every function), add a counter that can be used to test if function_graph tracer needs to test for a direct caller or not. It would have been nicer if we could use a static branch, but the static branch logic fails when used within the function graph tracer trampoline. Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 2bc7bd6b8387..55647e185141 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -247,10 +247,12 @@ static inline void ftrace_free_mem(struct module *mod, void *start, void *end) { #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +extern int ftrace_direct_func_count; int register_ftrace_direct(unsigned long ip, unsigned long addr); int unregister_ftrace_direct(unsigned long ip, unsigned long addr); struct ftrace_direct_func *ftrace_find_direct_func(unsigned long addr); #else +# define ftrace_direct_func_count 0 static inline int register_ftrace_direct(unsigned long ip, unsigned long addr) { return -ENODEV; -- cgit v1.2.3 From 2d6425af61166e026e7476db64f70f1266127b1d Mon Sep 17 00:00:00 2001 From: Divya Indi Date: Wed, 14 Aug 2019 10:55:23 -0700 Subject: tracing: Declare newly exported APIs in include/linux/trace.h Declare the newly introduced and exported APIs in the header file - include/linux/trace.h. Moving previous declarations from kernel/trace/trace.h to include/linux/trace.h. Link: http://lkml.kernel.org/r/1565805327-579-2-git-send-email-divya.indi@oracle.com Signed-off-by: Divya Indi Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/trace.h b/include/linux/trace.h index b95ffb2188ab..24fcf07812ae 100644 --- a/include/linux/trace.h +++ b/include/linux/trace.h @@ -24,6 +24,13 @@ struct trace_export { int register_ftrace_export(struct trace_export *export); int unregister_ftrace_export(struct trace_export *export); +struct trace_array; + +void trace_printk_init_buffers(void); +int trace_array_printk(struct trace_array *tr, unsigned long ip, + const char *fmt, ...); +struct trace_array *trace_array_create(const char *name); +int trace_array_destroy(struct trace_array *tr); #endif /* CONFIG_TRACING */ #endif /* _LINUX_TRACE_H */ -- cgit v1.2.3 From cb711b91a3c685192f2cabd3735ca3de04694ed8 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 14 Nov 2019 01:27:21 +0800 Subject: blk-mq: Delete blk_mq_has_free_tags() and blk_mq_can_queue() These functions are not referenced, so delete them. Signed-off-by: John Garry Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index dc03e059fdff..11cfd6470b1a 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -424,7 +424,6 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set); void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); void blk_mq_free_request(struct request *rq); -bool blk_mq_can_queue(struct blk_mq_hw_ctx *); bool blk_mq_queue_inflight(struct request_queue *q); -- cgit v1.2.3 From 708edafa883186f55b24fa0c380242b5282f9105 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 14 Nov 2019 01:27:22 +0800 Subject: sbitmap: Delete sbitmap_any_bit_clear() Since the only caller of this function has been deleted, delete this one also. Signed-off-by: John Garry Signed-off-by: Jens Axboe --- include/linux/sbitmap.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index a986ac12a848..e40d019c3d9d 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -216,15 +216,6 @@ int sbitmap_get_shallow(struct sbitmap *sb, unsigned int alloc_hint, */ bool sbitmap_any_bit_set(const struct sbitmap *sb); -/** - * sbitmap_any_bit_clear() - Check for an unset bit in a &struct - * sbitmap. - * @sb: Bitmap to check. - * - * Return: true if any bit in the bitmap is clear, false otherwise. - */ -bool sbitmap_any_bit_clear(const struct sbitmap *sb); - #define SB_NR_TO_INDEX(sb, bitnr) ((bitnr) >> (sb)->shift) #define SB_NR_TO_BIT(sb, bitnr) ((bitnr) & ((1U << (sb)->shift) - 1U)) -- cgit v1.2.3 From 3ad2522c64cff1f5aebb987b00683268f0cc7c29 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 29 Oct 2019 13:41:38 -0700 Subject: statx: define STATX_ATTR_VERITY Add a statx attribute bit STATX_ATTR_VERITY which will be set if the file has fs-verity enabled. This is the statx() equivalent of FS_VERITY_FL which is returned by FS_IOC_GETFLAGS. This is useful because it allows applications to check whether a file is a verity file without opening it. Opening a verity file can be expensive because the fsverity_info is set up on open, which involves parsing metadata and optionally verifying a cryptographic signature. This is analogous to how various other bits are exposed through both FS_IOC_GETFLAGS and statx(), e.g. the encrypt bit. Reviewed-by: Andreas Dilger Acked-by: Darrick J. Wong Signed-off-by: Eric Biggers --- include/linux/stat.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/stat.h b/include/linux/stat.h index 765573dc17d6..528c4baad091 100644 --- a/include/linux/stat.h +++ b/include/linux/stat.h @@ -33,7 +33,8 @@ struct kstat { STATX_ATTR_IMMUTABLE | \ STATX_ATTR_APPEND | \ STATX_ATTR_NODUMP | \ - STATX_ATTR_ENCRYPTED \ + STATX_ATTR_ENCRYPTED | \ + STATX_ATTR_VERITY \ )/* Attrs corresponding to FS_*_FL flags */ u64 ino; dev_t dev; -- cgit v1.2.3 From 975b992fdd4b38028d7c1dcf38286d6e7991c1b2 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Tue, 12 Nov 2019 00:34:29 +0100 Subject: net/mlx5: Add new chain for netfilter flow table offload Netfilter tables (nftables) implements a software datapath that comes after tc ingress datapath. The datapath supports offloading such rules via the flow table offload API. This API is currently only used by NFT and it doesn't provide the global priority in regards to tc offload, so we assume offloading such rules must come after tc. It does provide a flow table priority parameter, so we need to provide some supported priority range. For that, split fastpath prio to two, flow table offload and tc offload, with one dedicated priority chain for flow table offload. Next patch will re-use the multi chain API to access this chain by allowing access to this chain by the fdb_sub_namespace. Signed-off-by: Paul Blakey Reviewed-by: Mark Bloch Acked-by: Pablo Neira Ayuso Signed-off-by: Saeed Mahameed --- include/linux/mlx5/fs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 724d276ea133..4e5b84e66822 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -80,7 +80,8 @@ enum mlx5_flow_namespace_type { enum { FDB_BYPASS_PATH, - FDB_FAST_PATH, + FDB_TC_OFFLOAD, + FDB_FT_OFFLOAD, FDB_SLOW_PATH, }; -- cgit v1.2.3 From bf49d9dd6fef688733e2ddbd55f7bcb57df194e4 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 18 Oct 2019 13:50:53 +0900 Subject: export,module: add SPDX GPL-2.0 license identifier to headers with no license Commit b24413180f56 ("License cleanup: add SPDX GPL-2.0 license identifier to files with no license") took care of a lot of files without any license information. These headers were not processed by the tool perhaps because they contain "GPL" in the code. I do not see any license boilerplate in them, so they fall back to GPL version 2 only, which is the project default. Signed-off-by: Masahiro Yamada Link: https://lore.kernel.org/r/20191018045053.8424-1-yamada.masahiro@socionext.com Signed-off-by: Greg Kroah-Hartman --- include/linux/export.h | 1 + include/linux/license.h | 1 + include/linux/module.h | 7 +++++-- 3 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/export.h b/include/linux/export.h index 941d075f03d6..aee5c86ae350 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ #ifndef _LINUX_EXPORT_H #define _LINUX_EXPORT_H diff --git a/include/linux/license.h b/include/linux/license.h index decdbf43cb5c..7cce390f120b 100644 --- a/include/linux/license.h +++ b/include/linux/license.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ #ifndef __LICENSE_H #define __LICENSE_H diff --git a/include/linux/module.h b/include/linux/module.h index 6d20895e7739..bd165ba68617 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -1,11 +1,14 @@ -#ifndef _LINUX_MODULE_H -#define _LINUX_MODULE_H +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Dynamic loading of modules into the kernel. * * Rewritten by Richard Henderson Dec 1996 * Rewritten again by Rusty Russell, 2002 */ + +#ifndef _LINUX_MODULE_H +#define _LINUX_MODULE_H + #include #include #include -- cgit v1.2.3 From 46549769d0512e44c5a08ae2dd05cebe55b3db16 Mon Sep 17 00:00:00 2001 From: Dhanuka Warusadura Date: Mon, 28 Oct 2019 16:37:44 +0530 Subject: w1: Fix documentation warning. This patch fixes - warning: Function parameter or member 'of_match_table' not described in 'w1_family' Signed-off-by: Dhanuka Warusadura Link: https://lore.kernel.org/r/20191028110744.6523-1-csx@disroot.org Signed-off-by: Greg Kroah-Hartman --- include/linux/w1.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/w1.h b/include/linux/w1.h index 7da0c7588e04..cebf3464bc03 100644 --- a/include/linux/w1.h +++ b/include/linux/w1.h @@ -262,6 +262,7 @@ struct w1_family_ops { * @family_entry: family linked list * @fid: 8 bit family identifier * @fops: operations for this family + * @of_match_table: open firmware match table * @refcnt: reference counter */ struct w1_family { -- cgit v1.2.3 From edb44e8461cfb9da8fd7a60d1ded2912a01100be Mon Sep 17 00:00:00 2001 From: "Ben Dooks (Codethink)" Date: Tue, 22 Oct 2019 09:43:23 +0100 Subject: cpu-topology: declare parse_acpi_topology in The parse_acpi_topology() is not declared anywhere which causes the following sparse warning: drivers/base/arch_topology.c:522:19: warning: symbol 'parse_acpi_topology' was not declared. Should it be static? Signed-off-by: Ben Dooks (Codethink) Acked-by: Sudeep Holla Link: https://lore.kernel.org/r/20191022084323.13594-1-ben.dooks@codethink.co.uk Signed-off-by: Greg Kroah-Hartman --- include/linux/arch_topology.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index 42f2b5126094..3015ecbb90b1 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -57,6 +57,7 @@ const struct cpumask *cpu_coregroup_mask(int cpu); void update_siblings_masks(unsigned int cpu); void remove_cpu_topology(unsigned int cpuid); void reset_cpu_topology(void); +int parse_acpi_topology(void); #endif #endif /* _LINUX_ARCH_TOPOLOGY_H_ */ -- cgit v1.2.3 From c0e5f4e73a7148e18b763067d181661987cb4c09 Mon Sep 17 00:00:00 2001 From: Rui Feng Date: Mon, 21 Oct 2019 16:05:05 +0800 Subject: misc: rtsx: Add support for RTS5261 Add support for new chip rts5261. In order to support rts5261, the definitions of some internal registers and workflow have to be modified and are different from its predecessors. So we need this patch to ensure RTS5261 can work. Signed-off-by: Rui Feng Link: https://lore.kernel.org/r/1571645105-5028-1-git-send-email-rui_feng@realsil.com.cn Signed-off-by: Greg Kroah-Hartman --- include/linux/rtsx_pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/rtsx_pci.h b/include/linux/rtsx_pci.h index f87da30a58b1..65b8142a7fed 100644 --- a/include/linux/rtsx_pci.h +++ b/include/linux/rtsx_pci.h @@ -1262,6 +1262,7 @@ struct rtsx_pcr { #define PID_5250 0x5250 #define PID_525A 0x525A #define PID_5260 0x5260 +#define PID_5261 0x5261 #define CHK_PCI_PID(pcr, pid) ((pcr)->pci->device == (pid)) #define PCI_VID(pcr) ((pcr)->pci->vendor) -- cgit v1.2.3 From e9cb0497b1c801a66f9abc907b2c55241099e6ac Mon Sep 17 00:00:00 2001 From: Richard Gong Date: Mon, 4 Nov 2019 10:24:36 -0600 Subject: firmware: Fix incompatible function behavior for RSU driver The older versions of remote system update (RSU) firmware don't support retry and notify features then the kernel module dies when it queries the RSU retry counter or performs notify operation. Update the Intel service layer and RSU drivers to be compatible with all versions of RSU firmware. Reported-by: Radu Barcau Reported-by: kbuild test robot Signed-off-by: Richard Gong Link: https://lore.kernel.org/r/1572884676-1385-1-git-send-email-richard.gong@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/firmware/intel/stratix10-svc-client.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/firmware/intel/stratix10-svc-client.h b/include/linux/firmware/intel/stratix10-svc-client.h index b6c4302a39e0..59bc6e2af693 100644 --- a/include/linux/firmware/intel/stratix10-svc-client.h +++ b/include/linux/firmware/intel/stratix10-svc-client.h @@ -41,6 +41,12 @@ * * SVC_STATUS_RSU_OK: * Secure firmware accepts the request of remote status update (RSU). + * + * SVC_STATUS_RSU_ERROR: + * Error encountered during remote system update. + * + * SVC_STATUS_RSU_NO_SUPPORT: + * Secure firmware doesn't support RSU retry or notify feature. */ #define SVC_STATUS_RECONFIG_REQUEST_OK 0 #define SVC_STATUS_RECONFIG_BUFFER_SUBMITTED 1 @@ -50,6 +56,8 @@ #define SVC_STATUS_RECONFIG_ERROR 5 #define SVC_STATUS_RSU_OK 6 #define SVC_STATUS_RSU_ERROR 7 +#define SVC_STATUS_RSU_NO_SUPPORT 8 + /** * Flag bit for COMMAND_RECONFIG * -- cgit v1.2.3 From a7e335deed174a37fc6f84f69caaeff8a08f8ff8 Mon Sep 17 00:00:00 2001 From: Eric Long Date: Wed, 23 Oct 2019 14:31:32 +0800 Subject: dmaengine: sprd: Add wrap address support for link-list mode The Spreadtrum Audio compress offload mode will use 2-stage DMA transfer to save power. That means we can request 2 dma channels, one for source channel, and another one for destination channel. Once the source channel's transaction is done, it will trigger the destination channel's transaction automatically by hardware signal. In this case, the source channel will transfer data from IRAM buffer to the DSP fifo to decoding/encoding, once IRAM buffer is empty by transferring done, the destination channel will start to transfer data from DDR buffer to IRAM buffer. Since the destination channel will use link-list mode to fill the IRAM data, and IRAM buffer is allocated by 32K, and DDR buffer is larger to 2M, that means we need lots of link-list nodes to do a cyclic transfer, instead wasting lots of link-list memory, we can use wrap address support to reduce link-list node number, which means when the transfer address reaches the wrap address, the transfer address will jump to the wrap_to address specified by wrap_to register, and only 2 link-list nodes can do a cyclic transfer to transfer data from DDR to IRAM. Thus this patch adds wrap address to support this case. [Baolin Wang changes the commit message] Signed-off-by: Eric Long Signed-off-by: Baolin Wang Link: https://lore.kernel.org/r/85a5484bc1f3dd53ce6f92700ad8b35f30a0b096.1571812029.git.baolin.wang@linaro.org Signed-off-by: Vinod Koul --- include/linux/dma/sprd-dma.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma/sprd-dma.h b/include/linux/dma/sprd-dma.h index ab82df64682a..d09c6f6f6da5 100644 --- a/include/linux/dma/sprd-dma.h +++ b/include/linux/dma/sprd-dma.h @@ -118,6 +118,9 @@ enum sprd_dma_int_type { * struct sprd_dma_linklist - DMA link-list address structure * @virt_addr: link-list virtual address to configure link-list node * @phy_addr: link-list physical address to link DMA transfer + * @wrap_addr: the wrap address for link-list mode, which means once the + * transfer address reaches the wrap address, the next transfer address + * will jump to the address specified by wrap_to register. * * The Spreadtrum DMA controller supports the link-list mode, that means slaves * can supply several groups configurations (each configuration represents one @@ -181,6 +184,7 @@ enum sprd_dma_int_type { struct sprd_dma_linklist { unsigned long virt_addr; phys_addr_t phy_addr; + phys_addr_t wrap_addr; }; #endif -- cgit v1.2.3 From 6917d0689993f46d97d40dd66c601d0fd5b1dbdd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 14 Nov 2019 15:34:33 +0100 Subject: block: merge invalidate_partitions into rescan_partitions A lot of the logic in invalidate_partitions and rescan_partitions is shared. Merge the two functions to simplify things. There is a small behavior change in that we now send the kevent change notice also if we were not invalidating but no partitions were found, which seems like the right thing to do. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Jens Axboe --- include/linux/genhd.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 8b5330dd5ac0..fd7774e64f0b 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -622,8 +622,8 @@ extern dev_t blk_lookup_devt(const char *name, int partno); extern char *disk_name (struct gendisk *hd, int partno, char *buf); extern int disk_expand_part_tbl(struct gendisk *disk, int target); -extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev); -extern int invalidate_partitions(struct gendisk *disk, struct block_device *bdev); +int rescan_partitions(struct gendisk *disk, struct block_device *bdev, + bool invalidate); extern struct hd_struct * __must_check add_partition(struct gendisk *disk, int partno, sector_t start, sector_t len, int flags, -- cgit v1.2.3 From a1548b674403c0de70cc29a1575689917ba60157 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 14 Nov 2019 15:34:34 +0100 Subject: block: move rescan_partitions to fs/block_dev.c Large parts of rescan_partitions aren't about partitions, and moving it to block_dev.c will allow for some further cleanups by merging it into its only caller. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Jens Axboe --- include/linux/fs.h | 2 -- include/linux/genhd.h | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index e0d909d35763..d233dd661df7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2703,8 +2703,6 @@ extern void make_bad_inode(struct inode *); extern bool is_bad_inode(struct inode *); #ifdef CONFIG_BLOCK -extern void check_disk_size_change(struct gendisk *disk, - struct block_device *bdev, bool verbose); extern int revalidate_disk(struct gendisk *); extern int check_disk_change(struct block_device *); extern int __invalidate_device(struct block_device *, bool); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index fd7774e64f0b..f5cffbf63abf 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -621,9 +621,9 @@ extern void blk_invalidate_devt(dev_t devt); extern dev_t blk_lookup_devt(const char *name, int partno); extern char *disk_name (struct gendisk *hd, int partno, char *buf); +int blk_add_partitions(struct gendisk *disk, struct block_device *bdev); +int blk_drop_partitions(struct gendisk *disk, struct block_device *bdev); extern int disk_expand_part_tbl(struct gendisk *disk, int target); -int rescan_partitions(struct gendisk *disk, struct block_device *bdev, - bool invalidate); extern struct hd_struct * __must_check add_partition(struct gendisk *disk, int partno, sector_t start, sector_t len, int flags, -- cgit v1.2.3 From 142fe8f4bb169e8632024d51c64653a8bf140561 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 14 Nov 2019 15:34:35 +0100 Subject: block: fix bdev_disk_changed for non-partitioned devices We still have to set the capacity to 0 if invalidating or call revalidate_disk if not even if the disk has no partitions. Fix that by merging rescan_partitions into bdev_disk_changed and just stubbing out blk_add_partitions and blk_drop_partitions for non-partitioned devices. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Jens Axboe --- include/linux/genhd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index f5cffbf63abf..8bb63027e4d6 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -621,6 +621,7 @@ extern void blk_invalidate_devt(dev_t devt); extern dev_t blk_lookup_devt(const char *name, int partno); extern char *disk_name (struct gendisk *hd, int partno, char *buf); +int bdev_disk_changed(struct block_device *bdev, bool invalidate); int blk_add_partitions(struct gendisk *disk, struct block_device *bdev); int blk_drop_partitions(struct gendisk *disk, struct block_device *bdev); extern int disk_expand_part_tbl(struct gendisk *disk, int target); -- cgit v1.2.3 From f0b870df80bc70dad432fd0c142bb709a49964f5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 14 Nov 2019 15:34:36 +0100 Subject: block: remove (__)blkdev_reread_part as an exported API In general drivers should never mess with partition tables directly. Unfortunately s390 and loop do for somewhat historic reasons, but they can use bdev_disk_changed directly instead when we export it as they satisfy the sanity checks we have in __blkdev_reread_part. Signed-off-by: Christoph Hellwig Reviewed-by: Stefan Haberland [dasd] Reviewed-by: Jan Kara Signed-off-by: Jens Axboe --- include/linux/fs.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index d233dd661df7..ae6c5c37f3ae 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2632,8 +2632,6 @@ extern void bd_finish_claiming(struct block_device *bdev, extern void bd_abort_claiming(struct block_device *bdev, struct block_device *whole, void *holder); extern void blkdev_put(struct block_device *bdev, fmode_t mode); -extern int __blkdev_reread_part(struct block_device *bdev); -extern int blkdev_reread_part(struct block_device *bdev); #ifdef CONFIG_SYSFS extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk); -- cgit v1.2.3 From 2ac55d5e5ec9ad0a07e194f0eaca865fe5aa3c40 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Thu, 17 Oct 2019 15:25:36 +0200 Subject: mmc: core: Re-work HW reset for SDIO cards It have turned out that it's not a good idea to unconditionally do a power cycle and then to re-initialize the SDIO card, as currently done through mmc_hw_reset() -> mmc_sdio_hw_reset(). This because there may be multiple SDIO func drivers probed, who also shares the same SDIO card. To address these scenarios, one may be tempted to use a notification mechanism, as to allow the core to inform each of the probed func drivers, about an ongoing HW reset. However, supporting such an operation from the func driver point of view, may not be entirely trivial. Therefore, let's use a more simplistic approach to solve the problem, by instead forcing the card to be removed and re-detected, via scheduling a rescan-work. In this way, we can rely on existing infrastructure, as the func driver's ->remove() and ->probe() callbacks, becomes invoked to deal with the cleanup and the re-initialization. This solution may be considered as rather heavy, especially if a func driver doesn't share its card with other func drivers. To address this, let's keep the current immediate HW reset option as well, but run it only when there is one func driver probed for the card. Finally, to allow the caller of mmc_hw_reset(), to understand if the reset is being asynchronously managed from a scheduled work, it returns 1 (propagated from mmc_sdio_hw_reset()). If the HW reset is executed successfully and synchronously it returns 0, which maintains the existing behaviour. Reviewed-by: Douglas Anderson Tested-by: Douglas Anderson Cc: stable@vger.kernel.org # v5.4+ Signed-off-by: Ulf Hansson --- include/linux/mmc/card.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 9b6336ad3266..e459b38ef33c 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -291,6 +291,7 @@ struct mmc_card { struct sd_switch_caps sw_caps; /* switch (CMD6) caps */ unsigned int sdio_funcs; /* number of SDIO functions */ + atomic_t sdio_funcs_probed; /* number of probed SDIO funcs */ struct sdio_cccr cccr; /* common card info */ struct sdio_cis cis; /* common tuple info */ struct sdio_func *sdio_func[SDIO_MAX_FUNCS]; /* SDIO functions (devices) */ -- cgit v1.2.3 From 7283fff8b524b2f27438429aca458b232f5c5c8a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Nov 2019 08:32:13 +0100 Subject: dma-mapping: remove the DMA_ATTR_WRITE_BARRIER flag This flag is not implemented by any backend and only set by the ib_umem module in a single instance. Link: https://lore.kernel.org/r/20191113073214.9514-2-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- include/linux/dma-mapping.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 4a1c4fca475a..8023071d6903 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -15,11 +15,8 @@ /** * List of possible attributes associated with a DMA mapping. The semantics * of each attribute should be defined in Documentation/DMA-attributes.txt. - * - * DMA_ATTR_WRITE_BARRIER: DMA to a memory region with this attribute - * forces all pending DMA writes to complete. */ -#define DMA_ATTR_WRITE_BARRIER (1UL << 0) + /* * DMA_ATTR_WEAK_ORDERING: Specifies that reads and writes to the mapping * may be weakly ordered, that is that reads and writes may pass each other. -- cgit v1.2.3 From 020003f763e24e4ed0bb3d8909f3940891536d5d Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 14 Nov 2019 08:25:28 -0800 Subject: bus: ti-sysc: Add module enable quirk for audio AESS We must set the autogating bit on enable for AESS (Audio Engine SubSystem) when probed with ti-sysc interconnect target module driver. Otherwise it won't idle properly. Cc: Peter Ujfalusi Tested-by: Peter Ujfalusi Signed-off-by: Tony Lindgren --- include/linux/platform_data/ti-sysc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/ti-sysc.h b/include/linux/platform_data/ti-sysc.h index b5b7a3423ca8..0b9380475144 100644 --- a/include/linux/platform_data/ti-sysc.h +++ b/include/linux/platform_data/ti-sysc.h @@ -49,6 +49,7 @@ struct sysc_regbits { s8 emufree_shift; }; +#define SYSC_MODULE_QUIRK_AESS BIT(19) #define SYSC_MODULE_QUIRK_SGX BIT(18) #define SYSC_MODULE_QUIRK_HDQ1W BIT(17) #define SYSC_MODULE_QUIRK_I2C BIT(16) -- cgit v1.2.3 From 52ae533b8a18e7ca868e7ac5953ad7258210f320 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 7 Oct 2019 16:56:54 +0300 Subject: lib/sort: Move swap, cmp and cmp_r function types for wider use The function types for swap, cmp and cmp_r functions are already being in use by modules. Move them to types.h that everybody in kernel will be able to use generic types instead of custom ones. This adds more sense to the comment in bsearch() later on. Link: http://lkml.kernel.org/r/20191007135656.37734-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Signed-off-by: Steven Rostedt (VMware) --- include/linux/sort.h | 8 ++++---- include/linux/types.h | 5 +++++ 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sort.h b/include/linux/sort.h index 61b96d0ebc44..b5898725fe9d 100644 --- a/include/linux/sort.h +++ b/include/linux/sort.h @@ -5,12 +5,12 @@ #include void sort_r(void *base, size_t num, size_t size, - int (*cmp)(const void *, const void *, const void *), - void (*swap)(void *, void *, int), + cmp_r_func_t cmp_func, + swap_func_t swap_func, const void *priv); void sort(void *base, size_t num, size_t size, - int (*cmp)(const void *, const void *), - void (*swap)(void *, void *, int)); + cmp_func_t cmp_func, + swap_func_t swap_func); #endif diff --git a/include/linux/types.h b/include/linux/types.h index 05030f608be3..85c0e7b18153 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -225,5 +225,10 @@ struct callback_head { typedef void (*rcu_callback_t)(struct rcu_head *head); typedef void (*call_rcu_func_t)(struct rcu_head *head, rcu_callback_t func); +typedef void (*swap_func_t)(void *a, void *b, int size); + +typedef int (*cmp_r_func_t)(const void *a, const void *b, const void *priv); +typedef int (*cmp_func_t)(const void *a, const void *b); + #endif /* __ASSEMBLY__ */ #endif /* _LINUX_TYPES_H */ -- cgit v1.2.3 From e8877ec5dbba6f39d25ca3a81716c23b1760f2ee Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 7 Oct 2019 16:56:55 +0300 Subject: lib/bsearch: Use generic type for comparator function Comparator function type, cmp_func_t, is defined in the types.h, use it in bsearch() and, thus, add more sense to the corresponding comment in the code. Link: http://lkml.kernel.org/r/20191007135656.37734-2-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Signed-off-by: Steven Rostedt (VMware) --- include/linux/bsearch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bsearch.h b/include/linux/bsearch.h index 62b1eb348858..8ed53d7524ea 100644 --- a/include/linux/bsearch.h +++ b/include/linux/bsearch.h @@ -5,6 +5,6 @@ #include void *bsearch(const void *key, const void *base, size_t num, size_t size, - int (*cmp)(const void *key, const void *elt)); + cmp_func_t cmp); #endif /* _LINUX_BSEARCH_H */ -- cgit v1.2.3 From 353cade3149c27b53260932ee3ff1ebde405976d Mon Sep 17 00:00:00 2001 From: Piotr Maziarz Date: Thu, 7 Nov 2019 13:45:37 +0100 Subject: seq_buf: Add printing formatted hex dumps Provided function is an analogue of print_hex_dump(). Implementing this function in seq_buf allows using for multiple purposes (e.g. for tracing) and therefore prevents from code duplication in every layer that uses seq_buf. print_hex_dump() is an essential part of logging data to dmesg. Adding similar capability for other purposes is beneficial to all users. Example usage: seq_buf_hex_dump(seq, "", DUMP_PREFIX_OFFSET, 16, 4, buf, ARRAY_SIZE(buf), true); Example output: 00000000: 00000000 ffffff10 ffffff32 ffff3210 ........2....2.. 00000010: ffff3210 83d00437 c0700000 00000000 .2..7.....p..... 00000020: 02010004 0000000f 0000000f 00004002 .............@.. 00000030: 00000fff 00000000 ........ Link: http://lkml.kernel.org/r/1573130738-29390-1-git-send-email-piotrx.maziarz@linux.intel.com Signed-off-by: Piotr Maziarz Signed-off-by: Cezary Rojewski Signed-off-by: Steven Rostedt (VMware) --- include/linux/seq_buf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h index aa5deb041c25..fb0205d87d3c 100644 --- a/include/linux/seq_buf.h +++ b/include/linux/seq_buf.h @@ -125,6 +125,9 @@ extern int seq_buf_putmem(struct seq_buf *s, const void *mem, unsigned int len); extern int seq_buf_putmem_hex(struct seq_buf *s, const void *mem, unsigned int len); extern int seq_buf_path(struct seq_buf *s, const struct path *path, const char *esc); +extern int seq_buf_hex_dump(struct seq_buf *s, const char *prefix_str, + int prefix_type, int rowsize, int groupsize, + const void *buf, size_t len, bool ascii); #ifdef CONFIG_BINARY_PRINTF extern int -- cgit v1.2.3 From ef56e047b2bd4dabb801fd073dfcab5f40de5f78 Mon Sep 17 00:00:00 2001 From: Piotr Maziarz Date: Thu, 7 Nov 2019 13:45:38 +0100 Subject: tracing: Use seq_buf_hex_dump() to dump buffers Without this, buffers can be printed with __print_array macro that has no formatting options and can be hard to read. The other way is to mimic formatting capability with multiple calls of trace event with one call per row which gives performance impact and different timestamp in each row. Link: http://lkml.kernel.org/r/1573130738-29390-2-git-send-email-piotrx.maziarz@linux.intel.com Signed-off-by: Piotr Maziarz Signed-off-by: Cezary Rojewski Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace_events.h | 5 +++++ include/linux/trace_seq.h | 4 ++++ 2 files changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 30a8cdcfd4a4..60a41b7069dd 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -45,6 +45,11 @@ const char *trace_print_array_seq(struct trace_seq *p, const void *buf, int count, size_t el_size); +const char * +trace_print_hex_dump_seq(struct trace_seq *p, const char *prefix_str, + int prefix_type, int rowsize, int groupsize, + const void *buf, size_t len, bool ascii); + struct trace_iterator; struct trace_event; diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h index 6609b39a7232..6c30508fca19 100644 --- a/include/linux/trace_seq.h +++ b/include/linux/trace_seq.h @@ -92,6 +92,10 @@ extern int trace_seq_path(struct trace_seq *s, const struct path *path); extern void trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp, int nmaskbits); +extern int trace_seq_hex_dump(struct trace_seq *s, const char *prefix_str, + int prefix_type, int rowsize, int groupsize, + const void *buf, size_t len, bool ascii); + #else /* CONFIG_TRACING */ static inline void trace_seq_printf(struct trace_seq *s, const char *fmt, ...) { -- cgit v1.2.3 From db205c766862edae48d64e69e2f2502e2a3e9135 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Thu, 14 Nov 2019 10:57:37 +0100 Subject: vsock: remove vm_sockets_get_local_cid() vm_sockets_get_local_cid() is only used in virtio_transport_common.c. We can replace it calling the virtio_transport_get_ops() and using the get_local_cid() callback registered by the transport. Reviewed-by: Stefan Hajnoczi Reviewed-by: Jorgen Hansen Signed-off-by: Stefano Garzarella Signed-off-by: David S. Miller --- include/linux/vm_sockets.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vm_sockets.h b/include/linux/vm_sockets.h index 33f1a2ecd905..7dd899ccb920 100644 --- a/include/linux/vm_sockets.h +++ b/include/linux/vm_sockets.h @@ -10,6 +10,4 @@ #include -int vm_sockets_get_local_cid(void); - #endif /* _VM_SOCKETS_H */ -- cgit v1.2.3 From 3603a2e991a82e5094c3107a792859b08342aed3 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Thu, 14 Nov 2019 10:57:38 +0100 Subject: vsock: remove include/linux/vm_sockets.h file This header file now only includes the "uapi/linux/vm_sockets.h". We can include directly it when needed. Reviewed-by: Stefan Hajnoczi Reviewed-by: Jorgen Hansen Signed-off-by: Stefano Garzarella Signed-off-by: David S. Miller --- include/linux/vm_sockets.h | 13 ------------- 1 file changed, 13 deletions(-) delete mode 100644 include/linux/vm_sockets.h (limited to 'include/linux') diff --git a/include/linux/vm_sockets.h b/include/linux/vm_sockets.h deleted file mode 100644 index 7dd899ccb920..000000000000 --- a/include/linux/vm_sockets.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * VMware vSockets Driver - * - * Copyright (C) 2007-2013 VMware, Inc. All rights reserved. - */ - -#ifndef _VM_SOCKETS_H -#define _VM_SOCKETS_H - -#include - -#endif /* _VM_SOCKETS_H */ -- cgit v1.2.3 From 4c7246dc45e2706770d5233f7ce1597a07e069ba Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Thu, 14 Nov 2019 10:57:40 +0100 Subject: vsock/virtio: add transport parameter to the virtio_transport_reset_no_sock() We are going to add 'struct vsock_sock *' parameter to virtio_transport_get_ops(). In some cases, like in the virtio_transport_reset_no_sock(), we don't have any socket assigned to the packet received, so we can't use the virtio_transport_get_ops(). In order to allow virtio_transport_reset_no_sock() to use the '.send_pkt' callback from the 'vhost_transport' or 'virtio_transport', we add the 'struct virtio_transport *' to it and to its caller: virtio_transport_recv_pkt(). We moved the 'vhost_transport' and 'virtio_transport' definition, to pass their address to the virtio_transport_recv_pkt(). Reviewed-by: Stefan Hajnoczi Signed-off-by: Stefano Garzarella Signed-off-by: David S. Miller --- include/linux/virtio_vsock.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index 07875ccc7bb5..b139f76060a6 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -150,7 +150,8 @@ virtio_transport_dgram_enqueue(struct vsock_sock *vsk, void virtio_transport_destruct(struct vsock_sock *vsk); -void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt); +void virtio_transport_recv_pkt(struct virtio_transport *t, + struct virtio_vsock_pkt *pkt); void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt); void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct virtio_vsock_pkt *pkt); u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 wanted); -- cgit v1.2.3 From b9f2b0ffde0c9b666b2b1672eb468b8f805a9b97 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Thu, 14 Nov 2019 10:57:42 +0100 Subject: vsock: handle buffer_size sockopts in the core virtio_transport and vmci_transport handle the buffer_size sockopts in a very similar way. In order to support multiple transports, this patch moves this handling in the core to allow the user to change the options also if the socket is not yet assigned to any transport. This patch also adds the '.notify_buffer_size' callback in the 'struct virtio_transport' in order to inform the transport, when the buffer_size is changed by the user. It is also useful to limit the 'buffer_size' requested (e.g. virtio transports). Acked-by: Dexuan Cui Reviewed-by: Stefan Hajnoczi Reviewed-by: Jorgen Hansen Signed-off-by: Stefano Garzarella Signed-off-by: David S. Miller --- include/linux/virtio_vsock.h | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index b139f76060a6..71c81e0dc8f2 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -7,9 +7,6 @@ #include #include -#define VIRTIO_VSOCK_DEFAULT_MIN_BUF_SIZE 128 -#define VIRTIO_VSOCK_DEFAULT_BUF_SIZE (1024 * 256) -#define VIRTIO_VSOCK_DEFAULT_MAX_BUF_SIZE (1024 * 256) #define VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE (1024 * 4) #define VIRTIO_VSOCK_MAX_BUF_SIZE 0xFFFFFFFFUL #define VIRTIO_VSOCK_MAX_PKT_BUF_SIZE (1024 * 64) @@ -25,11 +22,6 @@ enum { struct virtio_vsock_sock { struct vsock_sock *vsk; - /* Protected by lock_sock(sk_vsock(trans->vsk)) */ - u32 buf_size; - u32 buf_size_min; - u32 buf_size_max; - spinlock_t tx_lock; spinlock_t rx_lock; @@ -92,12 +84,6 @@ s64 virtio_transport_stream_has_space(struct vsock_sock *vsk); int virtio_transport_do_socket_init(struct vsock_sock *vsk, struct vsock_sock *psk); -u64 virtio_transport_get_buffer_size(struct vsock_sock *vsk); -u64 virtio_transport_get_min_buffer_size(struct vsock_sock *vsk); -u64 virtio_transport_get_max_buffer_size(struct vsock_sock *vsk); -void virtio_transport_set_buffer_size(struct vsock_sock *vsk, u64 val); -void virtio_transport_set_min_buffer_size(struct vsock_sock *vsk, u64 val); -void virtio_transport_set_max_buffer_size(struct vsock_sock *vs, u64 val); int virtio_transport_notify_poll_in(struct vsock_sock *vsk, size_t target, @@ -124,6 +110,7 @@ int virtio_transport_notify_send_pre_enqueue(struct vsock_sock *vsk, struct vsock_transport_send_notify_data *data); int virtio_transport_notify_send_post_enqueue(struct vsock_sock *vsk, ssize_t written, struct vsock_transport_send_notify_data *data); +void virtio_transport_notify_buffer_size(struct vsock_sock *vsk, u64 *val); u64 virtio_transport_stream_rcvhiwat(struct vsock_sock *vsk); bool virtio_transport_stream_is_active(struct vsock_sock *vsk); -- cgit v1.2.3 From b1bba80a4376aef34de2b57bfb8834bd095703ed Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Thu, 14 Nov 2019 10:57:47 +0100 Subject: vsock/vmci: register vmci_transport only when VMCI guest/host are active To allow other transports to be loaded with vmci_transport, we register the vmci_transport as G2H or H2G only when a VMCI guest or host is active. To do that, this patch adds a callback registered in the vmci driver that will be called when the host or guest becomes active. This callback will register the vmci_transport in the VSOCK core. Cc: Jorgen Hansen Signed-off-by: Stefano Garzarella Signed-off-by: David S. Miller --- include/linux/vmw_vmci_api.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vmw_vmci_api.h b/include/linux/vmw_vmci_api.h index acd9fafe4fc6..f28907345c80 100644 --- a/include/linux/vmw_vmci_api.h +++ b/include/linux/vmw_vmci_api.h @@ -19,6 +19,7 @@ struct msghdr; typedef void (vmci_device_shutdown_fn) (void *device_registration, void *user_data); +typedef void (*vmci_vsock_cb) (bool is_host); int vmci_datagram_create_handle(u32 resource_id, u32 flags, vmci_datagram_recv_cb recv_cb, @@ -37,6 +38,7 @@ int vmci_doorbell_destroy(struct vmci_handle handle); int vmci_doorbell_notify(struct vmci_handle handle, u32 priv_flags); u32 vmci_get_context_id(void); bool vmci_is_context_owner(u32 context_id, kuid_t uid); +int vmci_register_vsock_callback(vmci_vsock_cb callback); int vmci_event_subscribe(u32 event, vmci_event_cb callback, void *callback_data, -- cgit v1.2.3 From 2f4741497c9d5151c6bde0edb6faf34fc3859b80 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Thu, 14 Nov 2019 19:06:47 -0800 Subject: libnvdimm: Trivial comment fix Signed-off-by: Ira Weiny Link: https://lore.kernel.org/r/20190918211933.13213-1-ira.weiny@intel.com Signed-off-by: Dan Williams --- include/linux/nd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nd.h b/include/linux/nd.h index f778f962d1b6..55c735997805 100644 --- a/include/linux/nd.h +++ b/include/linux/nd.h @@ -147,7 +147,7 @@ static inline int nvdimm_read_bytes(struct nd_namespace_common *ndns, /** * nvdimm_write_bytes() - synchronously write bytes to an nvdimm namespace - * @ndns: device to read + * @ndns: device to write * @offset: namespace-relative starting offset * @buf: buffer to drain * @size: transfer length -- cgit v1.2.3 From 0567d6809182df53da03636fad36c507c5cf07a5 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 14 Nov 2019 14:39:35 -0500 Subject: ftrace: Add modify_ftrace_direct() Add a new function modify_ftrace_direct() that will allow a user to update an existing direct caller to a new trampoline, without missing hits due to unregistering one and then adding another. Link: https://lore.kernel.org/r/20191109022907.6zzo6orhxpt5n2sv@ast-mbp.dhcp.thefacebook.com Suggested-by: Alexei Starovoitov Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 55647e185141..73eb2e93593f 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -250,6 +250,7 @@ static inline void ftrace_free_mem(struct module *mod, void *start, void *end) { extern int ftrace_direct_func_count; int register_ftrace_direct(unsigned long ip, unsigned long addr); int unregister_ftrace_direct(unsigned long ip, unsigned long addr); +int modify_ftrace_direct(unsigned long ip, unsigned long old_addr, unsigned long new_addr); struct ftrace_direct_func *ftrace_find_direct_func(unsigned long addr); #else # define ftrace_direct_func_count 0 @@ -261,6 +262,11 @@ static inline int unregister_ftrace_direct(unsigned long ip, unsigned long addr) { return -ENODEV; } +static inline int modify_ftrace_direct(unsigned long ip, + unsigned long old_addr, unsigned long new_addr) +{ + return -ENODEV; +} static inline struct ftrace_direct_func *ftrace_find_direct_func(unsigned long addr) { return NULL; -- cgit v1.2.3 From 4d219f4cf0322ab40c1b21a836a6d4b61e01216f Mon Sep 17 00:00:00 2001 From: "H. Nikolaus Schaller" Date: Thu, 7 Nov 2019 11:30:41 +0100 Subject: mmc: host: omap-hsmmc: remove init_card pdata callback from pdata Now as we have removed the last user (pandora_wl1251_init_card) of this callback, we can remove it from the hsmmc code. Suggested-by: Ulf Hansson Signed-off-by: H. Nikolaus Schaller Signed-off-by: Ulf Hansson --- include/linux/platform_data/hsmmc-omap.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/hsmmc-omap.h b/include/linux/platform_data/hsmmc-omap.h index e79d238ff18f..7124a5f4bf06 100644 --- a/include/linux/platform_data/hsmmc-omap.h +++ b/include/linux/platform_data/hsmmc-omap.h @@ -67,9 +67,6 @@ struct omap_hsmmc_platform_data { /* string specifying a particular variant of hardware */ char *version; - /* if we have special card, init it using this callback */ - void (*init_card)(struct mmc_card *card); - const char *name; u32 ocr_mask; }; -- cgit v1.2.3 From e5db673e7fe2f971ec82039a28dc0811c2100e87 Mon Sep 17 00:00:00 2001 From: "H. Nikolaus Schaller" Date: Thu, 7 Nov 2019 11:30:42 +0100 Subject: mmc: sdio: fix wl1251 vendor id v4.11-rc1 did introduce a patch series that rearranged the sdio quirks into a header file. Unfortunately this did forget to handle SDIO_VENDOR_ID_TI differently between wl1251 and wl1271 with the result that although the wl1251 was found on the sdio bus, the firmware did not load any more and there was no interface registration. This patch defines separate constants to be used by sdio quirks and drivers. Fixes: 884f38607897 ("mmc: core: move some sdio IDs out of quirks file") Signed-off-by: H. Nikolaus Schaller Cc: # v4.11+ Signed-off-by: Ulf Hansson --- include/linux/mmc/sdio_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h index d1a5d5df02f5..08b25c02b5a1 100644 --- a/include/linux/mmc/sdio_ids.h +++ b/include/linux/mmc/sdio_ids.h @@ -71,6 +71,8 @@ #define SDIO_VENDOR_ID_TI 0x0097 #define SDIO_DEVICE_ID_TI_WL1271 0x4076 +#define SDIO_VENDOR_ID_TI_WL1251 0x104c +#define SDIO_DEVICE_ID_TI_WL1251 0x9066 #define SDIO_VENDOR_ID_STE 0x0020 #define SDIO_DEVICE_ID_STE_CW1200 0x2280 -- cgit v1.2.3 From 4df4cb9e99f83b70d54bc0e25081ac23cceafcbc Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Wed, 13 Nov 2019 01:11:49 +0000 Subject: x86/hyperv: Initialize clockevents earlier in CPU onlining Hyper-V has historically initialized stimer-based clockevents late in the process of onlining a CPU because clockevents depend on stimer interrupts. In the original Hyper-V design, stimer interrupts generate a VMbus message, so the VMbus machinery must be running first, and VMbus can't be initialized until relatively late. On x86/64, LAPIC timer based clockevents are used during early initialization before VMbus and stimer-based clockevents are ready, and again during CPU offlining after the stimer clockevents have been shut down. Unfortunately, this design creates problems when offlining CPUs for hibernation or other purposes. stimer-based clockevents are shut down relatively early in the offlining process, so clockevents_unbind_device() must be used to fallback to the LAPIC-based clockevents for the remainder of the offlining process. Furthermore, the late initialization and early shutdown of stimer-based clockevents doesn't work well on ARM64 since there is no other timer like the LAPIC to fallback to. So CPU onlining and offlining doesn't work properly. Fix this by recognizing that stimer Direct Mode is the normal path for newer versions of Hyper-V on x86/64, and the only path on other architectures. With stimer Direct Mode, stimer interrupts don't require any VMbus machinery. stimer clockevents can be initialized and shut down consistent with how it is done for other clockevent devices. While the old VMbus-based stimer interrupts must still be supported for backward compatibility on x86, that mode of operation can be treated as legacy. So add a new Hyper-V stimer entry in the CPU hotplug state list, and use that new state when in Direct Mode. Update the Hyper-V clocksource driver to allocate and initialize stimer clockevents earlier during boot. Update Hyper-V initialization and the VMbus driver to use this new design. As a result, the LAPIC timer is no longer used during boot or CPU onlining/offlining and clockevents_unbind_device() is not called. But retain the old design as a legacy implementation for older versions of Hyper-V that don't support Direct Mode. Signed-off-by: Michael Kelley Signed-off-by: Thomas Gleixner Tested-by: Dexuan Cui Reviewed-by: Dexuan Cui Link: https://lkml.kernel.org/r/1573607467-9456-1-git-send-email-mikelley@microsoft.com --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 068793a619ca..4dcaea13b1f0 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -129,6 +129,7 @@ enum cpuhp_state { CPUHP_AP_ARC_TIMER_STARTING, CPUHP_AP_RISCV_TIMER_STARTING, CPUHP_AP_CSKY_TIMER_STARTING, + CPUHP_AP_HYPERV_TIMER_STARTING, CPUHP_AP_KVM_STARTING, CPUHP_AP_KVM_ARM_VGIC_INIT_STARTING, CPUHP_AP_KVM_ARM_VGIC_STARTING, -- cgit v1.2.3 From 3ca270fc9edb258d5bfa271bcf851614e9e6e7d4 Mon Sep 17 00:00:00 2001 From: Like Xu Date: Sun, 27 Oct 2019 18:52:38 +0800 Subject: perf/core: Provide a kernel-internal interface to recalibrate event period Currently, perf_event_period() is used by user tools via ioctl. Based on naming convention, exporting perf_event_period() for kernel users (such as KVM) who may recalibrate the event period for their assigned counter according to their requirements. The perf_event_period() is an external accessor, just like the perf_event_{en,dis}able() and should thus use perf_event_ctx_lock(). Suggested-by: Kan Liang Signed-off-by: Like Xu Acked-by: Peter Zijlstra Signed-off-by: Paolo Bonzini --- include/linux/perf_event.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 61448c19a132..d601df36e671 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1336,6 +1336,7 @@ extern void perf_event_disable_local(struct perf_event *event); extern void perf_event_disable_inatomic(struct perf_event *event); extern void perf_event_task_tick(void); extern int perf_event_account_interrupt(struct perf_event *event); +extern int perf_event_period(struct perf_event *event, u64 value); #else /* !CONFIG_PERF_EVENTS: */ static inline void * perf_aux_output_begin(struct perf_output_handle *handle, @@ -1415,6 +1416,10 @@ static inline void perf_event_disable(struct perf_event *event) { } static inline int __perf_event_disable(void *info) { return -1; } static inline void perf_event_task_tick(void) { } static inline int perf_event_release_kernel(struct perf_event *event) { return 0; } +static inline int perf_event_period(struct perf_event *event, u64 value) +{ + return -EINVAL; +} #endif #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL) -- cgit v1.2.3 From 52ba4b0b99770e892f43da1238f437155acb8b58 Mon Sep 17 00:00:00 2001 From: Like Xu Date: Sun, 27 Oct 2019 18:52:39 +0800 Subject: perf/core: Provide a kernel-internal interface to pause perf_event Exporting perf_event_pause() as an external accessor for kernel users (such as KVM) who may do both disable perf_event and read count with just one time to hold perf_event_ctx_lock. Also the value could be reset optionally. Suggested-by: Peter Zijlstra Signed-off-by: Like Xu Acked-by: Peter Zijlstra Signed-off-by: Paolo Bonzini --- include/linux/perf_event.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index d601df36e671..e9768bfc76f6 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1337,6 +1337,7 @@ extern void perf_event_disable_inatomic(struct perf_event *event); extern void perf_event_task_tick(void); extern int perf_event_account_interrupt(struct perf_event *event); extern int perf_event_period(struct perf_event *event, u64 value); +extern u64 perf_event_pause(struct perf_event *event, bool reset); #else /* !CONFIG_PERF_EVENTS: */ static inline void * perf_aux_output_begin(struct perf_output_handle *handle, @@ -1420,6 +1421,10 @@ static inline int perf_event_period(struct perf_event *event, u64 value) { return -EINVAL; } +static inline u64 perf_event_pause(struct perf_event *event, bool reset) +{ + return 0; +} #endif #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL) -- cgit v1.2.3 From 8750e72a79dda2f665ce17b62049f4d62130d991 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Thu, 7 Nov 2019 07:53:42 -0500 Subject: KVM: remember position in kvm->vcpus array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fetching an index for any vcpu in kvm->vcpus array by traversing the entire array everytime is costly. This patch remembers the position of each vcpu in kvm->vcpus array by storing it in vcpus_idx under kvm_vcpu structure. Signed-off-by: Radim Krčmář Signed-off-by: Nitesh Narayan Lal Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a817e446c9aa..70b2296fb2ae 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -266,7 +266,8 @@ struct kvm_vcpu { struct preempt_notifier preempt_notifier; #endif int cpu; - int vcpu_id; + int vcpu_id; /* id given by userspace at creation */ + int vcpu_idx; /* index in kvm->vcpus array */ int srcu_idx; int mode; u64 requests; @@ -570,13 +571,7 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id) static inline int kvm_vcpu_get_idx(struct kvm_vcpu *vcpu) { - struct kvm_vcpu *tmp; - int idx; - - kvm_for_each_vcpu(idx, tmp, vcpu->kvm) - if (tmp == vcpu) - return idx; - BUG(); + return vcpu->vcpu_idx; } #define kvm_for_each_memslot(memslot, slots) \ -- cgit v1.2.3 From 7ee30bc132c683d06a6d9e360e39e483e3990708 Mon Sep 17 00:00:00 2001 From: Nitesh Narayan Lal Date: Thu, 7 Nov 2019 07:53:43 -0500 Subject: KVM: x86: deliver KVM IOAPIC scan request to target vCPUs In IOAPIC fixed delivery mode instead of flushing the scan requests to all vCPUs, we should only send the requests to vCPUs specified within the destination field. This patch introduces kvm_get_dest_vcpus_mask() API which retrieves an array of target vCPUs by using kvm_apic_map_get_dest_lapic() and then based on the vcpus_idx, it sets the bit in a bitmap. However, if the above fails kvm_get_dest_vcpus_mask() finds the target vCPUs by traversing all available vCPUs. Followed by setting the bits in the bitmap. If we had different vCPUs in the previous request for the same redirection table entry then bits corresponding to these vCPUs are also set. This to done to keep ioapic_handled_vectors synchronized. This bitmap is then eventually passed on to kvm_make_vcpus_request_mask() to generate a masked request only for the target vCPUs. This would enable us to reduce the latency overhead on isolated vCPUs caused by the IPI to process due to KVM_REQ_IOAPIC_SCAN. Suggested-by: Marcelo Tosatti Signed-off-by: Nitesh Narayan Lal Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 70b2296fb2ae..bfe6c6729988 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -786,6 +786,8 @@ void kvm_reload_remote_mmus(struct kvm *kvm); bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req, unsigned long *vcpu_bitmap, cpumask_var_t tmp); bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req); +bool kvm_make_cpus_request_mask(struct kvm *kvm, unsigned int req, + unsigned long *vcpu_bitmap); long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); -- cgit v1.2.3 From 2a785996cc5e2fc1d1d29d196f530905f68d2dc2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 5 Nov 2019 11:10:01 +0100 Subject: y2038: uapi: change __kernel_time_t to __kernel_old_time_t This is mainly a patch for clarification, and to let us remove the time_t definition from the kernel to prevent new users from creeping in that might not be y2038-safe. All remaining uses of 'time_t' or '__kernel_time_t' are part of the user API that cannot be changed by that either have a replacement or that do not suffer from the y2038 overflow. Acked-by: Deepa Dinamani Acked-by: Christian Brauner Signed-off-by: Arnd Bergmann --- include/linux/syscalls.h | 4 ++-- include/linux/time32.h | 2 +- include/linux/types.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index f7c561c4dcdd..2f27bc9d5ef0 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1076,7 +1076,7 @@ asmlinkage long sys_fadvise64(int fd, loff_t offset, size_t len, int advice); asmlinkage long sys_alarm(unsigned int seconds); asmlinkage long sys_getpgrp(void); asmlinkage long sys_pause(void); -asmlinkage long sys_time(time_t __user *tloc); +asmlinkage long sys_time(__kernel_old_time_t __user *tloc); asmlinkage long sys_time32(old_time32_t __user *tloc); #ifdef __ARCH_WANT_SYS_UTIME asmlinkage long sys_utime(char __user *filename, @@ -1116,7 +1116,7 @@ asmlinkage long sys_sysfs(int option, asmlinkage long sys_fork(void); /* obsolete: kernel/time/time.c */ -asmlinkage long sys_stime(time_t __user *tptr); +asmlinkage long sys_stime(__kernel_old_time_t __user *tptr); asmlinkage long sys_stime32(old_time32_t __user *tptr); /* obsolete: kernel/signal.c */ diff --git a/include/linux/time32.h b/include/linux/time32.h index 0a1f302a1753..cad4c3186002 100644 --- a/include/linux/time32.h +++ b/include/linux/time32.h @@ -12,7 +12,7 @@ #include #include -#define TIME_T_MAX (time_t)((1UL << ((sizeof(time_t) << 3) - 1)) - 1) +#define TIME_T_MAX (__kernel_old_time_t)((1UL << ((sizeof(__kernel_old_time_t) << 3) - 1)) - 1) typedef s32 old_time32_t; diff --git a/include/linux/types.h b/include/linux/types.h index 05030f608be3..e32c1180b742 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -67,7 +67,7 @@ typedef __kernel_ptrdiff_t ptrdiff_t; #ifndef _TIME_T #define _TIME_T -typedef __kernel_time_t time_t; +typedef __kernel_old_time_t time_t; #endif #ifndef _CLOCK_T -- cgit v1.2.3 From 75d319c06e6a76f67549c0ae1007dc3167804f4e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 25 Oct 2019 22:56:17 +0200 Subject: y2038: syscalls: change remaining timeval to __kernel_old_timeval All of the remaining syscalls that pass a timeval (gettimeofday, utime, futimesat) can trivially be changed to pass a __kernel_old_timeval instead, which has a compatible layout, but avoids ambiguity with the timeval type in user space. Acked-by: Christian Brauner Acked-by: Rafael J. Wysocki Signed-off-by: Arnd Bergmann --- include/linux/syscalls.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 2f27bc9d5ef0..e665920fa359 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -51,7 +51,7 @@ struct statx; struct __sysctl_args; struct sysinfo; struct timespec; -struct timeval; +struct __kernel_old_timeval; struct __kernel_timex; struct timezone; struct tms; @@ -732,7 +732,7 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, asmlinkage long sys_getcpu(unsigned __user *cpu, unsigned __user *node, struct getcpu_cache __user *cache); /* kernel/time.c */ -asmlinkage long sys_gettimeofday(struct timeval __user *tv, +asmlinkage long sys_gettimeofday(struct __kernel_old_timeval __user *tv, struct timezone __user *tz); asmlinkage long sys_settimeofday(struct timeval __user *tv, struct timezone __user *tz); @@ -1082,9 +1082,9 @@ asmlinkage long sys_time32(old_time32_t __user *tloc); asmlinkage long sys_utime(char __user *filename, struct utimbuf __user *times); asmlinkage long sys_utimes(char __user *filename, - struct timeval __user *utimes); + struct __kernel_old_timeval __user *utimes); asmlinkage long sys_futimesat(int dfd, const char __user *filename, - struct timeval __user *utimes); + struct __kernel_old_timeval __user *utimes); #endif asmlinkage long sys_futimesat_time32(unsigned int dfd, const char __user *filename, @@ -1098,7 +1098,7 @@ asmlinkage long sys_getdents(unsigned int fd, struct linux_dirent __user *dirent, unsigned int count); asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp, - fd_set __user *exp, struct timeval __user *tvp); + fd_set __user *exp, struct __kernel_old_timeval __user *tvp); asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds, int timeout); asmlinkage long sys_epoll_wait(int epfd, struct epoll_event __user *events, -- cgit v1.2.3 From df1b4ba9d4a8454285c53c2ec7224228105bc5c8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 25 Oct 2019 22:04:46 +0200 Subject: y2038: socket: use __kernel_old_timespec instead of timespec The 'timespec' type definition and helpers like ktime_to_timespec() or timespec64_to_timespec() should no longer be used in the kernel so we can remove them and avoid introducing y2038 issues in new code. Change the socket code that needs to pass a timespec to user space for backward compatibility to use __kernel_old_timespec instead. This type has the same layout but with a clearer defined name. Slightly reformat tcp_recv_timestamp() for consistency after the removal of timespec64_to_timespec(). Acked-by: Deepa Dinamani Signed-off-by: Arnd Bergmann --- include/linux/skbuff.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 64a395c7f689..6d64ffe92867 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3656,9 +3656,12 @@ static inline void skb_get_new_timestamp(const struct sk_buff *skb, } static inline void skb_get_timestampns(const struct sk_buff *skb, - struct timespec *stamp) + struct __kernel_old_timespec *stamp) { - *stamp = ktime_to_timespec(skb->tstamp); + struct timespec64 ts = ktime_to_timespec64(skb->tstamp); + + stamp->tv_sec = ts.tv_sec; + stamp->tv_nsec = ts.tv_nsec; } static inline void skb_get_new_timestampns(const struct sk_buff *skb, -- cgit v1.2.3 From 693737b6cc2edc258040b28462a90b63503394d9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 23 Nov 2017 14:22:06 +0100 Subject: y2038: make ns_to_compat_timeval use __kernel_old_timeval This gets us one step closer to removing 'struct timeval' from the kernel. We still keep __kernel_old_timeval for interfaces that we cannot fix otherwise, and ns_to_compat_timeval() is provably safe for interfaces that are legitimate users of __kernel_old_timeval on native kernels, so this is an obvious change. Signed-off-by: Arnd Bergmann --- include/linux/compat.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 16dafd9f4b86..3735a22bfbc0 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -937,10 +937,10 @@ static inline bool in_compat_syscall(void) { return is_compat_task(); } */ static inline struct old_timeval32 ns_to_old_timeval32(s64 nsec) { - struct timeval tv; + struct __kernel_old_timeval tv; struct old_timeval32 ctv; - tv = ns_to_timeval(nsec); + tv = ns_to_kernel_old_timeval(nsec); ctv.tv_sec = tv.tv_sec; ctv.tv_usec = tv.tv_usec; -- cgit v1.2.3 From 5e0fb1b57bea8d11fe77da2bc80f4c9a67e28318 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 15 Aug 2018 20:04:11 +0200 Subject: y2038: time: avoid timespec usage in settimeofday() The compat_get_timeval() and timeval_valid() interfaces are deprecated and getting removed along with the definition of struct timeval itself. Change the two implementations of the settimeofday() system call to open-code these helpers and completely avoid references to timeval. The timeval_valid() call is not needed any more here, only a check to avoid overflowing tv_nsec during the multiplication, as there is another range check in do_sys_settimeofday64(). Tested-by: syzbot+dccce9b26ba09ca49966@syzkaller.appspotmail.com Signed-off-by: Arnd Bergmann --- include/linux/syscalls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index e665920fa359..d0391cc2dae9 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -734,7 +734,7 @@ asmlinkage long sys_getcpu(unsigned __user *cpu, unsigned __user *node, struct g /* kernel/time.c */ asmlinkage long sys_gettimeofday(struct __kernel_old_timeval __user *tv, struct timezone __user *tz); -asmlinkage long sys_settimeofday(struct timeval __user *tv, +asmlinkage long sys_settimeofday(struct __kernel_old_timeval __user *tv, struct timezone __user *tz); asmlinkage long sys_adjtimex(struct __kernel_timex __user *txc_p); asmlinkage long sys_adjtimex_time32(struct old_timex32 __user *txc_p); -- cgit v1.2.3 From c1745f84be2657f5702388133551b759b9237f59 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 25 Oct 2019 10:46:22 +0200 Subject: y2038: itimer: compat handling to itimer.c The structure is only used in one place, moving it there simplifies the interface and helps with later changes to this code. Rename it to match the other time32 structures in the process. Reviewed-by: Thomas Gleixner Signed-off-by: Arnd Bergmann --- include/linux/compat.h | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 3735a22bfbc0..906a0ea933cd 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -116,14 +116,7 @@ typedef __compat_gid32_t compat_gid_t; struct compat_sel_arg_struct; struct rusage; -struct compat_itimerval { - struct old_timeval32 it_interval; - struct old_timeval32 it_value; -}; - -struct itimerval; -int get_compat_itimerval(struct itimerval *, const struct compat_itimerval __user *); -int put_compat_itimerval(struct compat_itimerval __user *, const struct itimerval *); +struct old_itimerval32; struct compat_tms { compat_clock_t tms_utime; @@ -668,10 +661,10 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr, /* kernel/itimer.c */ asmlinkage long compat_sys_getitimer(int which, - struct compat_itimerval __user *it); + struct old_itimerval32 __user *it); asmlinkage long compat_sys_setitimer(int which, - struct compat_itimerval __user *in, - struct compat_itimerval __user *out); + struct old_itimerval32 __user *in, + struct old_itimerval32 __user *out); /* kernel/kexec.c */ asmlinkage long compat_sys_kexec_load(compat_ulong_t entry, -- cgit v1.2.3 From ddbc7d0657e9fd38b69f16bd0310703367b52d29 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 25 Oct 2019 21:37:43 +0200 Subject: y2038: move itimer reset into itimer.c Preparing for a change to the itimer internals, stop using the do_setitimer() symbol and instead use a new higher-level interface. The do_getitimer()/do_setitimer functions can now be made static, allowing the compiler to potentially produce better object code. Reviewed-by: Thomas Gleixner Signed-off-by: Arnd Bergmann --- include/linux/time.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/time.h b/include/linux/time.h index 27d83fd2ae61..0760a4f5a15c 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -35,10 +35,11 @@ extern time64_t mktime64(const unsigned int year, const unsigned int mon, extern u32 (*arch_gettimeoffset)(void); #endif -struct itimerval; -extern int do_setitimer(int which, struct itimerval *value, - struct itimerval *ovalue); -extern int do_getitimer(int which, struct itimerval *value); +#ifdef CONFIG_POSIX_TIMERS +extern void clear_itimer(void); +#else +static inline void clear_itimer(void) {} +#endif extern long do_utimes(int dfd, const char __user *filename, struct timespec64 *times, int flags); -- cgit v1.2.3 From 3c845acd0237caef617f330a0e3b37ad8ae9fea5 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 15 Nov 2019 11:22:10 +0100 Subject: jbd2: make jbd2_handle_buffer_credits() handle reserved handles The helper jbd2_handle_buffer_credits() doesn't correctly handle reserved handles which can lead to crashes. Fix it getting of journal pointer to work for reserved handles as well. Fixes: a9a8344ee171 ("ext4, jbd2: Provide accessor function for handle credits") Reported-by: Eric Biggers Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20191115102210.29445-1-jack@suse.cz Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 587c146d3987..842b62606025 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1627,10 +1627,14 @@ static inline tid_t jbd2_get_latest_transaction(journal_t *journal) return tid; } - static inline int jbd2_handle_buffer_credits(handle_t *handle) { - journal_t *journal = handle->h_transaction->t_journal; + journal_t *journal; + + if (!handle->h_reserved) + journal = handle->h_transaction->t_journal; + else + journal = handle->h_journal; return handle->h_total_credits - DIV_ROUND_UP(handle->h_revoke_credits_requested, -- cgit v1.2.3 From 6718b6f855a0b4962d54bd625be2718cb820cec6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 16 Oct 2019 16:47:32 +0100 Subject: pipe: Allow pipes to have kernel-reserved slots Split pipe->ring_size into two numbers: (1) pipe->ring_size - indicates the hard size of the pipe ring. (2) pipe->max_usage - indicates the maximum number of pipe ring slots that userspace orchestrated events can fill. This allows for a pipe that is both writable by the general kernel notification facility and by userspace, allowing plenty of ring space for notifications to be added whilst preventing userspace from being able to pin too much unswappable kernel space. Signed-off-by: David Howells --- include/linux/pipe_fs_i.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 96158ca80456..44f2245debda 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -32,6 +32,7 @@ struct pipe_buffer { * @wait: reader/writer wait point in case of empty/full pipe * @head: The point of buffer production * @tail: The point of buffer consumption + * @max_usage: The maximum number of slots that may be used in the ring * @ring_size: total number of buffers (should be a power of 2) * @tmp_page: cached released page * @readers: number of current readers of this pipe @@ -50,6 +51,7 @@ struct pipe_inode_info { wait_queue_head_t wait; unsigned int head; unsigned int tail; + unsigned int max_usage; unsigned int ring_size; unsigned int readers; unsigned int writers; @@ -150,9 +152,11 @@ static inline unsigned int pipe_space_for_user(unsigned int head, unsigned int t unsigned int p_occupancy, p_space; p_occupancy = pipe_occupancy(head, tail); - if (p_occupancy >= pipe->ring_size) + if (p_occupancy >= pipe->max_usage) return 0; p_space = pipe->ring_size - p_occupancy; + if (p_space > pipe->max_usage) + p_space = pipe->max_usage; return p_space; } -- cgit v1.2.3 From d41efb522e902364ab09c782d511c1bedc388ddd Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 4 Nov 2019 22:30:52 -0500 Subject: fs/namei.c: pull positivity check into follow_managed() There are 4 callers; two proceed to check if result is positive and fail with ENOENT if it isn't; one (in handle_lookup_down()) is guaranteed to yield positive and one (in lookup_fast()) is _preceded_ by positivity check. However, follow_managed() on a negative dentry is a (fairly cheap) no-op on anything other than autofs. And negative autofs dentries are never hashed, so lookup_fast() is not going to run into one of those. Moreover, successful follow_managed() on a _positive_ dentry never yields a negative one (and we significantly rely upon that in callers of lookup_fast()). In other words, we can easily transpose the positivity check and the call of follow_managed() in lookup_fast(). And that allows to fold the positivity check *into* follow_managed(), simplifying life for the code downstream of its calls. Signed-off-by: Al Viro --- include/linux/dcache.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 10090f11ab95..c1488cc84fd9 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -440,6 +440,11 @@ static inline bool d_is_negative(const struct dentry *dentry) return d_is_miss(dentry); } +static inline bool d_flags_negative(unsigned flags) +{ + return (flags & DCACHE_ENTRY_TYPE) == DCACHE_MISS_TYPE; +} + static inline bool d_is_positive(const struct dentry *dentry) { return !d_is_negative(dentry); -- cgit v1.2.3 From 6c2d4798a8d16cf4f3a28c3cd4af4f1dcbbb4d04 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 31 Oct 2019 01:21:58 -0400 Subject: new helper: lookup_positive_unlocked() Most of the callers of lookup_one_len_unlocked() treat negatives are ERR_PTR(-ENOENT). Provide a helper that would do just that. Note that a pinned positive dentry remains positive - it's ->d_inode is stable, etc.; a pinned _negative_ dentry can become positive at any point as long as you are not holding its parent at least shared. So using lookup_one_len_unlocked() needs to be careful; lookup_positive_unlocked() is safer and that's what the callers end up open-coding anyway. Signed-off-by: Al Viro --- include/linux/namei.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/namei.h b/include/linux/namei.h index 397a08ade6a2..7fe7b87a3ded 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -60,6 +60,7 @@ extern int kern_path_mountpoint(int, const char *, struct path *, unsigned int); extern struct dentry *try_lookup_one_len(const char *, struct dentry *, int); extern struct dentry *lookup_one_len(const char *, struct dentry *, int); extern struct dentry *lookup_one_len_unlocked(const char *, struct dentry *, int); +extern struct dentry *lookup_positive_unlocked(const char *, struct dentry *, int); extern int follow_down_one(struct path *); extern int follow_down(struct path *); -- cgit v1.2.3 From 9af433840b3f61ac30d569a85234ab8f210f813a Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sat, 9 Nov 2019 22:16:33 +0100 Subject: i2c: remove helpers for ref-counting clients MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are no in-tree users of these helpers anymore, and there shouldn't. Most use cases went away once the driver model started to refcount for us. There have been users like the media subsystem, but they all switched to better refcounting methods meanwhile. Media did this in 2008. Last user (IPMI) left 2018. Remove this cruft. Signed-off-by: Wolfram Sang Reviewed-by: Niklas Söderlund Reviewed-by: Jean Delvare Tested-by: Luca Ceresoli Reviewed-by: Luca Ceresoli Reviewed-by: Geert Uytterhoeven --- include/linux/i2c.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index aaf57d9b41db..88b825601f3d 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -850,9 +850,6 @@ extern void i2c_del_driver(struct i2c_driver *driver); #define i2c_add_driver(driver) \ i2c_register_driver(THIS_MODULE, driver) -extern struct i2c_client *i2c_use_client(struct i2c_client *client); -extern void i2c_release_client(struct i2c_client *client); - /* call the i2c_client->command() of all attached clients with * the given arguments */ extern void i2c_clients_command(struct i2c_adapter *adap, -- cgit v1.2.3 From b7b3fc8dd95bc02bd30680da258e09dda55270db Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Fri, 15 Nov 2019 13:37:22 +0100 Subject: bpf: Support doubleword alignment in bpf_jit_binary_alloc Currently passing alignment greater than 4 to bpf_jit_binary_alloc does not work: in such cases it silently aligns only to 4 bytes. On s390, in order to load a constant from memory in a large (>512k) BPF program, one must use lgrl instruction, whose memory operand must be aligned on an 8-byte boundary. This patch makes it possible to request 8-byte alignment from bpf_jit_binary_alloc, and also makes it issue a warning when an unsupported alignment is requested. Signed-off-by: Ilya Leoshkevich Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20191115123722.58462-1-iii@linux.ibm.com --- include/linux/filter.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 7a6f8f6f1da4..ad80e9c6111c 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -515,10 +515,12 @@ struct sock_fprog_kern { struct sock_filter *filter; }; +/* Some arches need doubleword alignment for their instructions and/or data */ +#define BPF_IMAGE_ALIGNMENT 8 + struct bpf_binary_header { u32 pages; - /* Some arches need word alignment for their instructions */ - u8 image[] __aligned(4); + u8 image[] __aligned(BPF_IMAGE_ALIGNMENT); }; struct bpf_prog { -- cgit v1.2.3 From 5964b2000f283ff5df366f718e0f083ebbaae977 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 14 Nov 2019 10:57:03 -0800 Subject: bpf: Add bpf_arch_text_poke() helper Add bpf_arch_text_poke() helper that is used by BPF trampoline logic to patch nops/calls in kernel text into calls into BPF trampoline and to patch calls/nops inside BPF programs too. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Song Liu Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20191114185720.1641606-4-ast@kernel.org --- include/linux/bpf.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 7c7f518811a6..8b90db25348a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1157,4 +1157,12 @@ static inline u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type, } #endif /* CONFIG_INET */ +enum bpf_text_poke_type { + BPF_MOD_NOP_TO_CALL, + BPF_MOD_CALL_TO_CALL, + BPF_MOD_CALL_TO_NOP, +}; +int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, + void *addr1, void *addr2); + #endif /* _LINUX_BPF_H */ -- cgit v1.2.3 From fec56f5890d93fc2ed74166c397dc186b1c25951 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 14 Nov 2019 10:57:04 -0800 Subject: bpf: Introduce BPF trampoline Introduce BPF trampoline concept to allow kernel code to call into BPF programs with practically zero overhead. The trampoline generation logic is architecture dependent. It's converting native calling convention into BPF calling convention. BPF ISA is 64-bit (even on 32-bit architectures). The registers R1 to R5 are used to pass arguments into BPF functions. The main BPF program accepts only single argument "ctx" in R1. Whereas CPU native calling convention is different. x86-64 is passing first 6 arguments in registers and the rest on the stack. x86-32 is passing first 3 arguments in registers. sparc64 is passing first 6 in registers. And so on. The trampolines between BPF and kernel already exist. BPF_CALL_x macros in include/linux/filter.h statically compile trampolines from BPF into kernel helpers. They convert up to five u64 arguments into kernel C pointers and integers. On 64-bit architectures this BPF_to_kernel trampolines are nops. On 32-bit architecture they're meaningful. The opposite job kernel_to_BPF trampolines is done by CAST_TO_U64 macros and __bpf_trace_##call() shim functions in include/trace/bpf_probe.h. They convert kernel function arguments into array of u64s that BPF program consumes via R1=ctx pointer. This patch set is doing the same job as __bpf_trace_##call() static trampolines, but dynamically for any kernel function. There are ~22k global kernel functions that are attachable via nop at function entry. The function arguments and types are described in BTF. The job of btf_distill_func_proto() function is to extract useful information from BTF into "function model" that architecture dependent trampoline generators will use to generate assembly code to cast kernel function arguments into array of u64s. For example the kernel function eth_type_trans has two pointers. They will be casted to u64 and stored into stack of generated trampoline. The pointer to that stack space will be passed into BPF program in R1. On x86-64 such generated trampoline will consume 16 bytes of stack and two stores of %rdi and %rsi into stack. The verifier will make sure that only two u64 are accessed read-only by BPF program. The verifier will also recognize the precise type of the pointers being accessed and will not allow typecasting of the pointer to a different type within BPF program. The tracing use case in the datacenter demonstrated that certain key kernel functions have (like tcp_retransmit_skb) have 2 or more kprobes that are always active. Other functions have both kprobe and kretprobe. So it is essential to keep both kernel code and BPF programs executing at maximum speed. Hence generated BPF trampoline is re-generated every time new program is attached or detached to maintain maximum performance. To avoid the high cost of retpoline the attached BPF programs are called directly. __bpf_prog_enter/exit() are used to support per-program execution stats. In the future this logic will be optimized further by adding support for bpf_stats_enabled_key inside generated assembly code. Introduction of preemptible and sleepable BPF programs will completely remove the need to call to __bpf_prog_enter/exit(). Detach of a BPF program from the trampoline should not fail. To avoid memory allocation in detach path the half of the page is used as a reserve and flipped after each attach/detach. 2k bytes is enough to call 40+ BPF programs directly which is enough for BPF tracing use cases. This limit can be increased in the future. BPF_TRACE_FENTRY programs have access to raw kernel function arguments while BPF_TRACE_FEXIT programs have access to kernel return value as well. Often kprobe BPF program remembers function arguments in a map while kretprobe fetches arguments from a map and analyzes them together with return value. BPF_TRACE_FEXIT accelerates this typical use case. Recursion prevention for kprobe BPF programs is done via per-cpu bpf_prog_active counter. In practice that turned out to be a mistake. It caused programs to randomly skip execution. The tracing tools missed results they were looking for. Hence BPF trampoline doesn't provide builtin recursion prevention. It's a job of BPF program itself and will be addressed in the follow up patches. BPF trampoline is intended to be used beyond tracing and fentry/fexit use cases in the future. For example to remove retpoline cost from XDP programs. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20191114185720.1641606-5-ast@kernel.org --- include/linux/bpf.h | 105 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 8b90db25348a..0d4c5c224d79 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -14,6 +14,8 @@ #include #include #include +#include +#include struct bpf_verifier_env; struct bpf_verifier_log; @@ -384,6 +386,100 @@ struct bpf_prog_stats { struct u64_stats_sync syncp; } __aligned(2 * sizeof(u64)); +struct btf_func_model { + u8 ret_size; + u8 nr_args; + u8 arg_size[MAX_BPF_FUNC_ARGS]; +}; + +/* Restore arguments before returning from trampoline to let original function + * continue executing. This flag is used for fentry progs when there are no + * fexit progs. + */ +#define BPF_TRAMP_F_RESTORE_REGS BIT(0) +/* Call original function after fentry progs, but before fexit progs. + * Makes sense for fentry/fexit, normal calls and indirect calls. + */ +#define BPF_TRAMP_F_CALL_ORIG BIT(1) +/* Skip current frame and return to parent. Makes sense for fentry/fexit + * programs only. Should not be used with normal calls and indirect calls. + */ +#define BPF_TRAMP_F_SKIP_FRAME BIT(2) + +/* Different use cases for BPF trampoline: + * 1. replace nop at the function entry (kprobe equivalent) + * flags = BPF_TRAMP_F_RESTORE_REGS + * fentry = a set of programs to run before returning from trampoline + * + * 2. replace nop at the function entry (kprobe + kretprobe equivalent) + * flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME + * orig_call = fentry_ip + MCOUNT_INSN_SIZE + * fentry = a set of program to run before calling original function + * fexit = a set of program to run after original function + * + * 3. replace direct call instruction anywhere in the function body + * or assign a function pointer for indirect call (like tcp_congestion_ops->cong_avoid) + * With flags = 0 + * fentry = a set of programs to run before returning from trampoline + * With flags = BPF_TRAMP_F_CALL_ORIG + * orig_call = original callback addr or direct function addr + * fentry = a set of program to run before calling original function + * fexit = a set of program to run after original function + */ +int arch_prepare_bpf_trampoline(void *image, struct btf_func_model *m, u32 flags, + struct bpf_prog **fentry_progs, int fentry_cnt, + struct bpf_prog **fexit_progs, int fexit_cnt, + void *orig_call); +/* these two functions are called from generated trampoline */ +u64 notrace __bpf_prog_enter(void); +void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start); + +enum bpf_tramp_prog_type { + BPF_TRAMP_FENTRY, + BPF_TRAMP_FEXIT, + BPF_TRAMP_MAX +}; + +struct bpf_trampoline { + /* hlist for trampoline_table */ + struct hlist_node hlist; + /* serializes access to fields of this trampoline */ + struct mutex mutex; + refcount_t refcnt; + u64 key; + struct { + struct btf_func_model model; + void *addr; + } func; + /* list of BPF programs using this trampoline */ + struct hlist_head progs_hlist[BPF_TRAMP_MAX]; + /* Number of attached programs. A counter per kind. */ + int progs_cnt[BPF_TRAMP_MAX]; + /* Executable image of trampoline */ + void *image; + u64 selector; +}; +#ifdef CONFIG_BPF_JIT +struct bpf_trampoline *bpf_trampoline_lookup(u64 key); +int bpf_trampoline_link_prog(struct bpf_prog *prog); +int bpf_trampoline_unlink_prog(struct bpf_prog *prog); +void bpf_trampoline_put(struct bpf_trampoline *tr); +#else +static inline struct bpf_trampoline *bpf_trampoline_lookup(u64 key) +{ + return NULL; +} +static inline int bpf_trampoline_link_prog(struct bpf_prog *prog) +{ + return -ENOTSUPP; +} +static inline int bpf_trampoline_unlink_prog(struct bpf_prog *prog) +{ + return -ENOTSUPP; +} +static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {} +#endif + struct bpf_prog_aux { atomic_t refcnt; u32 used_map_cnt; @@ -398,6 +494,9 @@ struct bpf_prog_aux { bool verifier_zext; /* Zero extensions has been inserted by verifier. */ bool offload_requested; bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */ + enum bpf_tramp_prog_type trampoline_prog_type; + struct bpf_trampoline *trampoline; + struct hlist_node tramp_hlist; /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ const struct btf_type *attach_func_proto; /* function name for valid attach_btf_id */ @@ -784,6 +883,12 @@ int btf_struct_access(struct bpf_verifier_log *log, u32 *next_btf_id); u32 btf_resolve_helper_id(struct bpf_verifier_log *log, void *, int); +int btf_distill_func_proto(struct bpf_verifier_log *log, + struct btf *btf, + const struct btf_type *func_proto, + const char *func_name, + struct btf_func_model *m); + #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { -- cgit v1.2.3 From 9cc31b3a092d9bf2a18f09ad77e727ddb42a5b1e Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 14 Nov 2019 10:57:14 -0800 Subject: bpf: Fix race in btf_resolve_helper_id() btf_resolve_helper_id() caching logic is a bit racy, since under root the verifier can verify several programs in parallel. Fix it with READ/WRITE_ONCE. Fix the type as well, since error is also recorded. Fixes: a7658e1a4164 ("bpf: Check types of arguments passed into helpers") Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Song Liu Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20191114185720.1641606-15-ast@kernel.org --- include/linux/bpf.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 0d4c5c224d79..cb5a356381f5 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -248,7 +248,7 @@ struct bpf_func_proto { }; enum bpf_arg_type arg_type[5]; }; - u32 *btf_id; /* BTF ids of arguments */ + int *btf_id; /* BTF ids of arguments */ }; /* bpf_context is intentionally undefined structure. Pointer to bpf_context is @@ -881,7 +881,8 @@ int btf_struct_access(struct bpf_verifier_log *log, const struct btf_type *t, int off, int size, enum bpf_access_type atype, u32 *next_btf_id); -u32 btf_resolve_helper_id(struct bpf_verifier_log *log, void *, int); +int btf_resolve_helper_id(struct bpf_verifier_log *log, + const struct bpf_func_proto *fn, int); int btf_distill_func_proto(struct bpf_verifier_log *log, struct btf *btf, -- cgit v1.2.3 From 91cc1a99740e2ed1d903b5906afb470cc5a07379 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 14 Nov 2019 10:57:15 -0800 Subject: bpf: Annotate context types Annotate BPF program context types with program-side type and kernel-side type. This type information is used by the verifier. btf_get_prog_ctx_type() is used in the later patches to verify that BTF type of ctx in BPF program matches to kernel expected ctx type. For example, the XDP program type is: BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp, struct xdp_md, struct xdp_buff) That means that XDP program should be written as: int xdp_prog(struct xdp_md *ctx) { ... } Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20191114185720.1641606-16-ast@kernel.org --- include/linux/bpf.h | 11 ++++++- include/linux/bpf_types.h | 78 +++++++++++++++++++++++++++++++---------------- 2 files changed, 62 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index cb5a356381f5..9c48f11fe56e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -747,7 +747,7 @@ DECLARE_PER_CPU(int, bpf_prog_active); extern const struct file_operations bpf_map_fops; extern const struct file_operations bpf_prog_fops; -#define BPF_PROG_TYPE(_id, _name) \ +#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ extern const struct bpf_prog_ops _name ## _prog_ops; \ extern const struct bpf_verifier_ops _name ## _verifier_ops; #define BPF_MAP_TYPE(_id, _ops) \ @@ -1213,6 +1213,15 @@ static inline u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, #endif #ifdef CONFIG_INET +struct sk_reuseport_kern { + struct sk_buff *skb; + struct sock *sk; + struct sock *selected_sk; + void *data_end; + u32 hash; + u32 reuseport_id; + bool bind_inany; +}; bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type, struct bpf_insn_access_aux *info); diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index de14872b01ba..93740b3614d7 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -2,42 +2,68 @@ /* internal file - do not include directly */ #ifdef CONFIG_NET -BPF_PROG_TYPE(BPF_PROG_TYPE_SOCKET_FILTER, sk_filter) -BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_CLS, tc_cls_act) -BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_ACT, tc_cls_act) -BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp) +BPF_PROG_TYPE(BPF_PROG_TYPE_SOCKET_FILTER, sk_filter, + struct __sk_buff, struct sk_buff) +BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_CLS, tc_cls_act, + struct __sk_buff, struct sk_buff) +BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_ACT, tc_cls_act, + struct __sk_buff, struct sk_buff) +BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp, + struct xdp_md, struct xdp_buff) #ifdef CONFIG_CGROUP_BPF -BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb) -BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock) -BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, cg_sock_addr) +BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb, + struct __sk_buff, struct sk_buff) +BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock, + struct bpf_sock, struct sock) +BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, cg_sock_addr, + struct bpf_sock_addr, struct bpf_sock_addr_kern) #endif -BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_in) -BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_out) -BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit) -BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_SEG6LOCAL, lwt_seg6local) -BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb) -BPF_PROG_TYPE(BPF_PROG_TYPE_SK_MSG, sk_msg) -BPF_PROG_TYPE(BPF_PROG_TYPE_FLOW_DISSECTOR, flow_dissector) +BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_in, + struct __sk_buff, struct sk_buff) +BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_out, + struct __sk_buff, struct sk_buff) +BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit, + struct __sk_buff, struct sk_buff) +BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_SEG6LOCAL, lwt_seg6local, + struct __sk_buff, struct sk_buff) +BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops, + struct bpf_sock_ops, struct bpf_sock_ops_kern) +BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb, + struct __sk_buff, struct sk_buff) +BPF_PROG_TYPE(BPF_PROG_TYPE_SK_MSG, sk_msg, + struct sk_msg_md, struct sk_msg) +BPF_PROG_TYPE(BPF_PROG_TYPE_FLOW_DISSECTOR, flow_dissector, + struct __sk_buff, struct bpf_flow_dissector) #endif #ifdef CONFIG_BPF_EVENTS -BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe) -BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint) -BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event) -BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint) -BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, raw_tracepoint_writable) -BPF_PROG_TYPE(BPF_PROG_TYPE_TRACING, tracing) +BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe, + bpf_user_pt_regs_t, struct pt_regs) +BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint, + __u64, u64) +BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event, + struct bpf_perf_event_data, struct bpf_perf_event_data_kern) +BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint, + struct bpf_raw_tracepoint_args, u64) +BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, raw_tracepoint_writable, + struct bpf_raw_tracepoint_args, u64) +BPF_PROG_TYPE(BPF_PROG_TYPE_TRACING, tracing, + void *, void *) #endif #ifdef CONFIG_CGROUP_BPF -BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev) -BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SYSCTL, cg_sysctl) -BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCKOPT, cg_sockopt) +BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev, + struct bpf_cgroup_dev_ctx, struct bpf_cgroup_dev_ctx) +BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SYSCTL, cg_sysctl, + struct bpf_sysctl, struct bpf_sysctl_kern) +BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCKOPT, cg_sockopt, + struct bpf_sockopt, struct bpf_sockopt_kern) #endif #ifdef CONFIG_BPF_LIRC_MODE2 -BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2) +BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2, + __u32, u32) #endif #ifdef CONFIG_INET -BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport) +BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport, + struct sk_reuseport_md, struct sk_reuseport_kern) #endif BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops) -- cgit v1.2.3 From 8c1b6e69dcc1e11bd24111e3734dd740aaf3fda1 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 14 Nov 2019 10:57:16 -0800 Subject: bpf: Compare BTF types of functions arguments with actual types Make the verifier check that BTF types of function arguments match actual types passed into top-level BPF program and into BPF-to-BPF calls. If types match such BPF programs and sub-programs will have full support of BPF trampoline. If types mismatch the trampoline has to be conservative. It has to save/restore five program arguments and assume 64-bit scalars. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Song Liu Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20191114185720.1641606-17-ast@kernel.org --- include/linux/bpf.h | 8 ++++++++ include/linux/bpf_verifier.h | 1 + 2 files changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 9c48f11fe56e..c70bf04726b4 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -480,6 +480,10 @@ static inline int bpf_trampoline_unlink_prog(struct bpf_prog *prog) static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {} #endif +struct bpf_func_info_aux { + bool unreliable; +}; + struct bpf_prog_aux { atomic_t refcnt; u32 used_map_cnt; @@ -494,6 +498,7 @@ struct bpf_prog_aux { bool verifier_zext; /* Zero extensions has been inserted by verifier. */ bool offload_requested; bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */ + bool func_proto_unreliable; enum bpf_tramp_prog_type trampoline_prog_type; struct bpf_trampoline *trampoline; struct hlist_node tramp_hlist; @@ -518,6 +523,7 @@ struct bpf_prog_aux { struct bpf_prog_offload *offload; struct btf *btf; struct bpf_func_info *func_info; + struct bpf_func_info_aux *func_info_aux; /* bpf_line_info loaded from userspace. linfo->insn_off * has the xlated insn offset. * Both the main and sub prog share the same linfo. @@ -890,6 +896,8 @@ int btf_distill_func_proto(struct bpf_verifier_log *log, const char *func_name, struct btf_func_model *m); +int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog); + #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 6e7284ea1468..cdd08bf0ec06 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -343,6 +343,7 @@ static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log) #define BPF_MAX_SUBPROGS 256 struct bpf_subprog_info { + /* 'start' has to be the first field otherwise find_subprog() won't work */ u32 start; /* insn idx of function entry point */ u32 linfo_idx; /* The idx to the main_prog->aux->linfo */ u16 stack_depth; /* max. stack depth used by this function */ -- cgit v1.2.3 From 5b92a28aae4dd0f88778d540ecfdcdaec5a41723 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 14 Nov 2019 10:57:17 -0800 Subject: bpf: Support attaching tracing BPF program to other BPF programs Allow FENTRY/FEXIT BPF programs to attach to other BPF programs of any type including their subprograms. This feature allows snooping on input and output packets in XDP, TC programs including their return values. In order to do that the verifier needs to track types not only of vmlinux, but types of other BPF programs as well. The verifier also needs to translate uapi/linux/bpf.h types used by networking programs into kernel internal BTF types used by FENTRY/FEXIT BPF programs. In some cases LLVM optimizations can remove arguments from BPF subprograms without adjusting BTF info that LLVM backend knows. When BTF info disagrees with actual types that the verifiers sees the BPF trampoline has to fallback to conservative and treat all arguments as u64. The FENTRY/FEXIT program can still attach to such subprograms, but it won't be able to recognize pointer types like 'struct sk_buff *' and it won't be able to pass them to bpf_skb_output() for dumping packets to user space. The FENTRY/FEXIT program would need to use bpf_probe_read_kernel() instead. The BPF_PROG_LOAD command is extended with attach_prog_fd field. When it's set to zero the attach_btf_id is one vmlinux BTF type ids. When attach_prog_fd points to previously loaded BPF program the attach_btf_id is BTF type id of main function or one of its subprograms. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20191114185720.1641606-18-ast@kernel.org --- include/linux/bpf.h | 1 + include/linux/btf.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c70bf04726b4..5b81cde47314 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -495,6 +495,7 @@ struct bpf_prog_aux { u32 func_cnt; /* used by non-func prog as the number of func progs */ u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */ u32 attach_btf_id; /* in-kernel BTF type id to attach to */ + struct bpf_prog *linked_prog; bool verifier_zext; /* Zero extensions has been inserted by verifier. */ bool offload_requested; bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */ diff --git a/include/linux/btf.h b/include/linux/btf.h index 9dee00859c5f..79d4abc2556a 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -88,6 +88,7 @@ static inline bool btf_type_is_func_proto(const struct btf_type *t) const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id); const char *btf_name_by_offset(const struct btf *btf, u32 offset); struct btf *btf_parse_vmlinux(void); +struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog); #else static inline const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id) -- cgit v1.2.3 From 49cb2fc42ce4b7a656ee605e30c302efaa39c1a7 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Fri, 15 Nov 2019 13:36:20 +0100 Subject: fork: extend clone3() to support setting a PID The main motivation to add set_tid to clone3() is CRIU. To restore a process with the same PID/TID CRIU currently uses /proc/sys/kernel/ns_last_pid. It writes the desired (PID - 1) to ns_last_pid and then (quickly) does a clone(). This works most of the time, but it is racy. It is also slow as it requires multiple syscalls. Extending clone3() to support *set_tid makes it possible restore a process using CRIU without accessing /proc/sys/kernel/ns_last_pid and race free (as long as the desired PID/TID is available). This clone3() extension places the same restrictions (CAP_SYS_ADMIN) on clone3() with *set_tid as they are currently in place for ns_last_pid. The original version of this change was using a single value for set_tid. At the 2019 LPC, after presenting set_tid, it was, however, decided to change set_tid to an array to enable setting the PID of a process in multiple PID namespaces at the same time. If a process is created in a PID namespace it is possible to influence the PID inside and outside of the PID namespace. Details also in the corresponding selftest. To create a process with the following PIDs: PID NS level Requested PID 0 (host) 31496 1 42 2 1 For that example the two newly introduced parameters to struct clone_args (set_tid and set_tid_size) would need to be: set_tid[0] = 1; set_tid[1] = 42; set_tid[2] = 31496; set_tid_size = 3; If only the PIDs of the two innermost nested PID namespaces should be defined it would look like this: set_tid[0] = 1; set_tid[1] = 42; set_tid_size = 2; The PID of the newly created process would then be the next available free PID in the PID namespace level 0 (host) and 42 in the PID namespace at level 1 and the PID of the process in the innermost PID namespace would be 1. The set_tid array is used to specify the PID of a process starting from the innermost nested PID namespaces up to set_tid_size PID namespaces. set_tid_size cannot be larger then the current PID namespace level. Signed-off-by: Adrian Reber Reviewed-by: Christian Brauner Reviewed-by: Oleg Nesterov Reviewed-by: Dmitry Safonov <0x7f454c46@gmail.com> Acked-by: Andrei Vagin Link: https://lore.kernel.org/r/20191115123621.142252-1-areber@redhat.com Signed-off-by: Christian Brauner --- include/linux/pid.h | 3 ++- include/linux/pid_namespace.h | 2 ++ include/linux/sched/task.h | 3 +++ 3 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pid.h b/include/linux/pid.h index 034e3cd60dc0..998ae7d24450 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -124,7 +124,8 @@ extern struct pid *find_vpid(int nr); extern struct pid *find_get_pid(int nr); extern struct pid *find_ge_pid(int nr, struct pid_namespace *); -extern struct pid *alloc_pid(struct pid_namespace *ns); +extern struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid, + size_t set_tid_size); extern void free_pid(struct pid *pid); extern void disable_pid_allocation(struct pid_namespace *ns); diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 49538b172483..2ed6af88794b 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -12,6 +12,8 @@ #include #include +/* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */ +#define MAX_PID_NS_LEVEL 32 struct fs_pin; diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 4b1c3b664f51..f1879884238e 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -26,6 +26,9 @@ struct kernel_clone_args { unsigned long stack; unsigned long stack_size; unsigned long tls; + pid_t *set_tid; + /* Number of elements in *set_tid */ + size_t set_tid_size; }; /* -- cgit v1.2.3 From d46bca2b5d06cbb5f3e66945080f275bcfab7181 Mon Sep 17 00:00:00 2001 From: Lina Iyer Date: Fri, 15 Nov 2019 15:11:44 -0700 Subject: irqdomain: Add bus token DOMAIN_BUS_WAKEUP A single controller can handle normal interrupts and wake-up interrupts independently, with a different numbering space. It is thus crucial to allow the driver for such a controller discriminate between the two. A simple way to do so is to tag the wake-up irqdomain with a "bus token" that indicates the wake-up domain. This slightly abuses the notion of bus, but also radically simplifies the design of such a driver. Between two evils, we choose the least damaging. Suggested-by: Stephen Boyd Signed-off-by: Lina Iyer Signed-off-by: Marc Zyngier Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/1573855915-9841-2-git-send-email-ilina@codeaurora.org --- include/linux/irqdomain.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 583e7abd07f9..3c340dbc5a1f 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -83,6 +83,7 @@ enum irq_domain_bus_token { DOMAIN_BUS_IPI, DOMAIN_BUS_FSL_MC_MSI, DOMAIN_BUS_TI_SCI_INTA_MSI, + DOMAIN_BUS_WAKEUP, }; /** -- cgit v1.2.3 From 4a169a95d885fe5c050bac1a21d43c86ba955bcf Mon Sep 17 00:00:00 2001 From: Maulik Shah Date: Fri, 15 Nov 2019 15:11:49 -0700 Subject: genirq: Introduce irq_chip_get/set_parent_state calls On certain QTI chipsets some GPIOs are direct-connect interrupts to the GIC to be used as regular interrupt lines. When the GPIOs are not used for interrupt generation the interrupt line is disabled. But disabling the interrupt at GIC does not prevent the interrupt to be reported as pending at GIC_ISPEND. Later, when drivers call enable_irq() on the interrupt, an unwanted interrupt occurs. Introduce get and set methods for irqchip's parent to clear it's pending irq state. This then can be invoked by the GPIO interrupt controller on the parents in it hierarchy to clear the interrupt before enabling the interrupt. Signed-off-by: Maulik Shah Signed-off-by: Lina Iyer Signed-off-by: Marc Zyngier Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/1573855915-9841-7-git-send-email-ilina@codeaurora.org [updated commit text and minor code fixes] --- include/linux/irq.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index fb301cf29148..7853eb9301f2 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -610,6 +610,12 @@ extern int irq_chip_pm_put(struct irq_data *data); #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY extern void handle_fasteoi_ack_irq(struct irq_desc *desc); extern void handle_fasteoi_mask_irq(struct irq_desc *desc); +extern int irq_chip_set_parent_state(struct irq_data *data, + enum irqchip_irq_state which, + bool val); +extern int irq_chip_get_parent_state(struct irq_data *data, + enum irqchip_irq_state which, + bool *state); extern void irq_chip_enable_parent(struct irq_data *data); extern void irq_chip_disable_parent(struct irq_data *data); extern void irq_chip_ack_parent(struct irq_data *data); -- cgit v1.2.3 From 81ef8bf88065b07d597c723ca5b0f1f10a808de4 Mon Sep 17 00:00:00 2001 From: Lina Iyer Date: Fri, 15 Nov 2019 15:11:47 -0700 Subject: irqchip/qcom-pdc: Add irqdomain for wakeup capable GPIOs Introduce a new domain for wakeup capable GPIOs. The domain can be requested using the bus token DOMAIN_BUS_WAKEUP. In the following patches, we will specify PDC as the wakeup-parent for the TLMM GPIO irqchip. Requesting a wakeup GPIO will setup the GPIO and the corresponding PDC interrupt as its parent. Co-developed-by: Stephen Boyd Signed-off-by: Stephen Boyd Signed-off-by: Lina Iyer Signed-off-by: Marc Zyngier Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/1573855915-9841-5-git-send-email-ilina@codeaurora.org --- include/linux/soc/qcom/irq.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 include/linux/soc/qcom/irq.h (limited to 'include/linux') diff --git a/include/linux/soc/qcom/irq.h b/include/linux/soc/qcom/irq.h new file mode 100644 index 000000000000..637c0bfa89e7 --- /dev/null +++ b/include/linux/soc/qcom/irq.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __QCOM_IRQ_H +#define __QCOM_IRQ_H + +#include + +#define GPIO_NO_WAKE_IRQ ~0U + +/** + * QCOM specific IRQ domain flags that distinguishes the handling of wakeup + * capable interrupts by different interrupt controllers. + * + * IRQ_DOMAIN_FLAG_QCOM_PDC_WAKEUP: Line must be masked at TLMM and the + * interrupt configuration is done at PDC + * IRQ_DOMAIN_FLAG_QCOM_MPM_WAKEUP: Interrupt configuration is handled at TLMM + */ +#define IRQ_DOMAIN_FLAG_QCOM_PDC_WAKEUP (IRQ_DOMAIN_FLAG_NONCORE << 0) +#define IRQ_DOMAIN_FLAG_QCOM_MPM_WAKEUP (IRQ_DOMAIN_FLAG_NONCORE << 1) + +#endif -- cgit v1.2.3 From e35a6ae0eb3a7cc451e8d8db55e9b938a95de416 Mon Sep 17 00:00:00 2001 From: Lina Iyer Date: Fri, 15 Nov 2019 15:11:51 -0700 Subject: pinctrl/msm: Setup GPIO chip in hierarchy Some GPIOs are marked as wakeup capable and are routed to another interrupt controller that is an always-domain and can detect interrupts even when most of the SoC is powered off. The wakeup interrupt controller wakes up the GIC and replays the interrupt at the GIC. Setup the TLMM irqchip in hierarchy with the wakeup interrupt controller and ensure the wakeup GPIOs are handled correctly. Co-developed-by: Maulik Shah Signed-off-by: Lina Iyer Signed-off-by: Marc Zyngier Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/1573855915-9841-9-git-send-email-ilina@codeaurora.org ---- Changes in v2: - Address review comments - Fix Co-developed-by tag Changes in v1: - Address minor review comments - Remove redundant call to set irq handler - Move irq_domain_qcom_handle_wakeup() to this patch Changes in RFC v2: - Rebase on top of GPIO hierarchy support in linux-next - Set the chained irq handler for summary line --- include/linux/soc/qcom/irq.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/qcom/irq.h b/include/linux/soc/qcom/irq.h index 637c0bfa89e7..9e1ece58e55b 100644 --- a/include/linux/soc/qcom/irq.h +++ b/include/linux/soc/qcom/irq.h @@ -18,4 +18,17 @@ #define IRQ_DOMAIN_FLAG_QCOM_PDC_WAKEUP (IRQ_DOMAIN_FLAG_NONCORE << 0) #define IRQ_DOMAIN_FLAG_QCOM_MPM_WAKEUP (IRQ_DOMAIN_FLAG_NONCORE << 1) +/** + * irq_domain_qcom_handle_wakeup: Return if the domain handles interrupt + * configuration + * @d: irq domain + * + * This QCOM specific irq domain call returns if the interrupt controller + * requires the interrupt be masked at the child interrupt controller. + */ +static inline bool irq_domain_qcom_handle_wakeup(const struct irq_domain *d) +{ + return (d->flags & IRQ_DOMAIN_FLAG_QCOM_PDC_WAKEUP); +} + #endif -- cgit v1.2.3 From a079973f462a3d506c6a7f00c770a55b167ed094 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 14 Nov 2019 12:18:40 +0100 Subject: usb: typec: tcpm: Remove tcpc_config configuration mechanism All configuration can and should be done through fwnodes instead of through the tcpc_config struct and there are no existing users left of struct tcpc_config, so lets remove it. Signed-off-by: Hans de Goede Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20191114111840.40876-1-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/tcpm.h | 41 ----------------------------------------- 1 file changed, 41 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/tcpm.h b/include/linux/usb/tcpm.h index f516955a0cf4..e7979c01c351 100644 --- a/include/linux/usb/tcpm.h +++ b/include/linux/usb/tcpm.h @@ -46,45 +46,6 @@ enum tcpm_transmit_type { TCPC_TX_BIST_MODE_2 = 7 }; -/** - * struct tcpc_config - Port configuration - * @src_pdo: PDO parameters sent to port partner as response to - * PD_CTRL_GET_SOURCE_CAP message - * @nr_src_pdo: Number of entries in @src_pdo - * @snk_pdo: PDO parameters sent to partner as response to - * PD_CTRL_GET_SINK_CAP message - * @nr_snk_pdo: Number of entries in @snk_pdo - * @operating_snk_mw: - * Required operating sink power in mW - * @type: Port type (TYPEC_PORT_DFP, TYPEC_PORT_UFP, or - * TYPEC_PORT_DRP) - * @default_role: - * Default port role (TYPEC_SINK or TYPEC_SOURCE). - * Set to TYPEC_NO_PREFERRED_ROLE if no default role. - * @try_role_hw:True if try.{Src,Snk} is implemented in hardware - * @alt_modes: List of supported alternate modes - */ -struct tcpc_config { - const u32 *src_pdo; - unsigned int nr_src_pdo; - - const u32 *snk_pdo; - unsigned int nr_snk_pdo; - - const u32 *snk_vdo; - unsigned int nr_snk_vdo; - - unsigned int operating_snk_mw; - - enum typec_port_type type; - enum typec_port_data data; - enum typec_role default_role; - bool try_role_hw; /* try.{src,snk} implemented in hardware */ - bool self_powered; /* port belongs to a self powered device */ - - const struct typec_altmode_desc *alt_modes; -}; - /* Mux state attributes */ #define TCPC_MUX_USB_ENABLED BIT(0) /* USB enabled */ #define TCPC_MUX_DP_ENABLED BIT(1) /* DP enabled */ @@ -92,7 +53,6 @@ struct tcpc_config { /** * struct tcpc_dev - Port configuration and callback functions - * @config: Pointer to port configuration * @fwnode: Pointer to port fwnode * @get_vbus: Called to read current VBUS state * @get_current_limit: @@ -121,7 +81,6 @@ struct tcpc_config { * @mux: Pointer to multiplexer data */ struct tcpc_dev { - const struct tcpc_config *config; struct fwnode_handle *fwnode; int (*init)(struct tcpc_dev *dev); -- cgit v1.2.3 From d63007eb954e425f45766482813738f41db9af45 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 9 Nov 2019 18:09:53 +0100 Subject: crypto: ablkcipher - remove deprecated and unused ablkcipher support Now that all users of the deprecated ablkcipher interface have been moved to the skcipher interface, ablkcipher is no longer used and can be removed. Reviewed-by: Eric Biggers Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- include/linux/crypto.h | 435 ------------------------------------------------- 1 file changed, 435 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index e9f2c6b5d800..23365a9d062e 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -41,7 +41,6 @@ #define CRYPTO_ALG_TYPE_CIPHER 0x00000001 #define CRYPTO_ALG_TYPE_COMPRESS 0x00000002 #define CRYPTO_ALG_TYPE_AEAD 0x00000003 -#define CRYPTO_ALG_TYPE_ABLKCIPHER 0x00000005 #define CRYPTO_ALG_TYPE_SKCIPHER 0x00000005 #define CRYPTO_ALG_TYPE_KPP 0x00000008 #define CRYPTO_ALG_TYPE_ACOMPRESS 0x0000000a @@ -137,7 +136,6 @@ #define CRYPTO_MINALIGN_ATTR __attribute__ ((__aligned__(CRYPTO_MINALIGN))) struct scatterlist; -struct crypto_ablkcipher; struct crypto_async_request; struct crypto_tfm; struct crypto_type; @@ -160,19 +158,6 @@ struct crypto_async_request { u32 flags; }; -struct ablkcipher_request { - struct crypto_async_request base; - - unsigned int nbytes; - - void *info; - - struct scatterlist *src; - struct scatterlist *dst; - - void *__ctx[] CRYPTO_MINALIGN_ATTR; -}; - /** * DOC: Block Cipher Algorithm Definitions * @@ -180,57 +165,6 @@ struct ablkcipher_request { * managed via crypto_register_alg() and crypto_unregister_alg(). */ -/** - * struct ablkcipher_alg - asynchronous block cipher definition - * @min_keysize: Minimum key size supported by the transformation. This is the - * smallest key length supported by this transformation algorithm. - * This must be set to one of the pre-defined values as this is - * not hardware specific. Possible values for this field can be - * found via git grep "_MIN_KEY_SIZE" include/crypto/ - * @max_keysize: Maximum key size supported by the transformation. This is the - * largest key length supported by this transformation algorithm. - * This must be set to one of the pre-defined values as this is - * not hardware specific. Possible values for this field can be - * found via git grep "_MAX_KEY_SIZE" include/crypto/ - * @setkey: Set key for the transformation. This function is used to either - * program a supplied key into the hardware or store the key in the - * transformation context for programming it later. Note that this - * function does modify the transformation context. This function can - * be called multiple times during the existence of the transformation - * object, so one must make sure the key is properly reprogrammed into - * the hardware. This function is also responsible for checking the key - * length for validity. In case a software fallback was put in place in - * the @cra_init call, this function might need to use the fallback if - * the algorithm doesn't support all of the key sizes. - * @encrypt: Encrypt a scatterlist of blocks. This function is used to encrypt - * the supplied scatterlist containing the blocks of data. The crypto - * API consumer is responsible for aligning the entries of the - * scatterlist properly and making sure the chunks are correctly - * sized. In case a software fallback was put in place in the - * @cra_init call, this function might need to use the fallback if - * the algorithm doesn't support all of the key sizes. In case the - * key was stored in transformation context, the key might need to be - * re-programmed into the hardware in this function. This function - * shall not modify the transformation context, as this function may - * be called in parallel with the same transformation object. - * @decrypt: Decrypt a single block. This is a reverse counterpart to @encrypt - * and the conditions are exactly the same. - * @ivsize: IV size applicable for transformation. The consumer must provide an - * IV of exactly that size to perform the encrypt or decrypt operation. - * - * All fields except @ivsize are mandatory and must be filled. - */ -struct ablkcipher_alg { - int (*setkey)(struct crypto_ablkcipher *tfm, const u8 *key, - unsigned int keylen); - int (*encrypt)(struct ablkcipher_request *req); - int (*decrypt)(struct ablkcipher_request *req); - - unsigned int min_keysize; - unsigned int max_keysize; - unsigned int ivsize; -}; - /** * struct cipher_alg - single-block symmetric ciphers definition * @cia_min_keysize: Minimum key size supported by the transformation. This is @@ -415,7 +349,6 @@ struct crypto_istat_rng { }; #endif /* CONFIG_CRYPTO_STATS */ -#define cra_ablkcipher cra_u.ablkcipher #define cra_cipher cra_u.cipher #define cra_compress cra_u.compress @@ -483,8 +416,6 @@ struct crypto_istat_rng { * @cra_exit: Deinitialize the cryptographic transformation object. This is a * counterpart to @cra_init, used to remove various changes set in * @cra_init. - * @cra_u.ablkcipher: Union member which contains an asynchronous block cipher - * definition. See @struct @ablkcipher_alg. * @cra_u.cipher: Union member which contains a single-block symmetric cipher * definition. See @struct @cipher_alg. * @cra_u.compress: Union member which contains a (de)compression algorithm. @@ -526,7 +457,6 @@ struct crypto_alg { const struct crypto_type *cra_type; union { - struct ablkcipher_alg ablkcipher; struct cipher_alg cipher; struct compress_alg compress; } cra_u; @@ -554,8 +484,6 @@ struct crypto_alg { #ifdef CONFIG_CRYPTO_STATS void crypto_stats_init(struct crypto_alg *alg); void crypto_stats_get(struct crypto_alg *alg); -void crypto_stats_ablkcipher_encrypt(unsigned int nbytes, int ret, struct crypto_alg *alg); -void crypto_stats_ablkcipher_decrypt(unsigned int nbytes, int ret, struct crypto_alg *alg); void crypto_stats_aead_encrypt(unsigned int cryptlen, struct crypto_alg *alg, int ret); void crypto_stats_aead_decrypt(unsigned int cryptlen, struct crypto_alg *alg, int ret); void crypto_stats_ahash_update(unsigned int nbytes, int ret, struct crypto_alg *alg); @@ -578,10 +506,6 @@ static inline void crypto_stats_init(struct crypto_alg *alg) {} static inline void crypto_stats_get(struct crypto_alg *alg) {} -static inline void crypto_stats_ablkcipher_encrypt(unsigned int nbytes, int ret, struct crypto_alg *alg) -{} -static inline void crypto_stats_ablkcipher_decrypt(unsigned int nbytes, int ret, struct crypto_alg *alg) -{} static inline void crypto_stats_aead_encrypt(unsigned int cryptlen, struct crypto_alg *alg, int ret) {} static inline void crypto_stats_aead_decrypt(unsigned int cryptlen, struct crypto_alg *alg, int ret) @@ -675,18 +599,6 @@ int crypto_has_alg(const char *name, u32 type, u32 mask); * crypto_free_*(), as well as the various helpers below. */ -struct ablkcipher_tfm { - int (*setkey)(struct crypto_ablkcipher *tfm, const u8 *key, - unsigned int keylen); - int (*encrypt)(struct ablkcipher_request *req); - int (*decrypt)(struct ablkcipher_request *req); - - struct crypto_ablkcipher *base; - - unsigned int ivsize; - unsigned int reqsize; -}; - struct cipher_tfm { int (*cit_setkey)(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen); @@ -703,7 +615,6 @@ struct compress_tfm { u8 *dst, unsigned int *dlen); }; -#define crt_ablkcipher crt_u.ablkcipher #define crt_cipher crt_u.cipher #define crt_compress crt_u.compress @@ -712,7 +623,6 @@ struct crypto_tfm { u32 crt_flags; union { - struct ablkcipher_tfm ablkcipher; struct cipher_tfm cipher; struct compress_tfm compress; } crt_u; @@ -724,10 +634,6 @@ struct crypto_tfm { void *__crt_ctx[] CRYPTO_MINALIGN_ATTR; }; -struct crypto_ablkcipher { - struct crypto_tfm base; -}; - struct crypto_cipher { struct crypto_tfm base; }; @@ -835,347 +741,6 @@ static inline unsigned int crypto_tfm_ctx_alignment(void) return __alignof__(tfm->__crt_ctx); } -/* - * API wrappers. - */ -static inline struct crypto_ablkcipher *__crypto_ablkcipher_cast( - struct crypto_tfm *tfm) -{ - return (struct crypto_ablkcipher *)tfm; -} - -/** - * DOC: Asynchronous Block Cipher API - * - * Asynchronous block cipher API is used with the ciphers of type - * CRYPTO_ALG_TYPE_ABLKCIPHER (listed as type "ablkcipher" in /proc/crypto). - * - * Asynchronous cipher operations imply that the function invocation for a - * cipher request returns immediately before the completion of the operation. - * The cipher request is scheduled as a separate kernel thread and therefore - * load-balanced on the different CPUs via the process scheduler. To allow - * the kernel crypto API to inform the caller about the completion of a cipher - * request, the caller must provide a callback function. That function is - * invoked with the cipher handle when the request completes. - * - * To support the asynchronous operation, additional information than just the - * cipher handle must be supplied to the kernel crypto API. That additional - * information is given by filling in the ablkcipher_request data structure. - * - * For the asynchronous block cipher API, the state is maintained with the tfm - * cipher handle. A single tfm can be used across multiple calls and in - * parallel. For asynchronous block cipher calls, context data supplied and - * only used by the caller can be referenced the request data structure in - * addition to the IV used for the cipher request. The maintenance of such - * state information would be important for a crypto driver implementer to - * have, because when calling the callback function upon completion of the - * cipher operation, that callback function may need some information about - * which operation just finished if it invoked multiple in parallel. This - * state information is unused by the kernel crypto API. - */ - -static inline struct crypto_tfm *crypto_ablkcipher_tfm( - struct crypto_ablkcipher *tfm) -{ - return &tfm->base; -} - -/** - * crypto_free_ablkcipher() - zeroize and free cipher handle - * @tfm: cipher handle to be freed - */ -static inline void crypto_free_ablkcipher(struct crypto_ablkcipher *tfm) -{ - crypto_free_tfm(crypto_ablkcipher_tfm(tfm)); -} - -static inline struct ablkcipher_tfm *crypto_ablkcipher_crt( - struct crypto_ablkcipher *tfm) -{ - return &crypto_ablkcipher_tfm(tfm)->crt_ablkcipher; -} - -/** - * crypto_ablkcipher_ivsize() - obtain IV size - * @tfm: cipher handle - * - * The size of the IV for the ablkcipher referenced by the cipher handle is - * returned. This IV size may be zero if the cipher does not need an IV. - * - * Return: IV size in bytes - */ -static inline unsigned int crypto_ablkcipher_ivsize( - struct crypto_ablkcipher *tfm) -{ - return crypto_ablkcipher_crt(tfm)->ivsize; -} - -/** - * crypto_ablkcipher_blocksize() - obtain block size of cipher - * @tfm: cipher handle - * - * The block size for the ablkcipher referenced with the cipher handle is - * returned. The caller may use that information to allocate appropriate - * memory for the data returned by the encryption or decryption operation - * - * Return: block size of cipher - */ -static inline unsigned int crypto_ablkcipher_blocksize( - struct crypto_ablkcipher *tfm) -{ - return crypto_tfm_alg_blocksize(crypto_ablkcipher_tfm(tfm)); -} - -static inline unsigned int crypto_ablkcipher_alignmask( - struct crypto_ablkcipher *tfm) -{ - return crypto_tfm_alg_alignmask(crypto_ablkcipher_tfm(tfm)); -} - -static inline u32 crypto_ablkcipher_get_flags(struct crypto_ablkcipher *tfm) -{ - return crypto_tfm_get_flags(crypto_ablkcipher_tfm(tfm)); -} - -static inline void crypto_ablkcipher_set_flags(struct crypto_ablkcipher *tfm, - u32 flags) -{ - crypto_tfm_set_flags(crypto_ablkcipher_tfm(tfm), flags); -} - -static inline void crypto_ablkcipher_clear_flags(struct crypto_ablkcipher *tfm, - u32 flags) -{ - crypto_tfm_clear_flags(crypto_ablkcipher_tfm(tfm), flags); -} - -/** - * crypto_ablkcipher_setkey() - set key for cipher - * @tfm: cipher handle - * @key: buffer holding the key - * @keylen: length of the key in bytes - * - * The caller provided key is set for the ablkcipher referenced by the cipher - * handle. - * - * Note, the key length determines the cipher type. Many block ciphers implement - * different cipher modes depending on the key size, such as AES-128 vs AES-192 - * vs. AES-256. When providing a 16 byte key for an AES cipher handle, AES-128 - * is performed. - * - * Return: 0 if the setting of the key was successful; < 0 if an error occurred - */ -static inline int crypto_ablkcipher_setkey(struct crypto_ablkcipher *tfm, - const u8 *key, unsigned int keylen) -{ - struct ablkcipher_tfm *crt = crypto_ablkcipher_crt(tfm); - - return crt->setkey(crt->base, key, keylen); -} - -/** - * crypto_ablkcipher_reqtfm() - obtain cipher handle from request - * @req: ablkcipher_request out of which the cipher handle is to be obtained - * - * Return the crypto_ablkcipher handle when furnishing an ablkcipher_request - * data structure. - * - * Return: crypto_ablkcipher handle - */ -static inline struct crypto_ablkcipher *crypto_ablkcipher_reqtfm( - struct ablkcipher_request *req) -{ - return __crypto_ablkcipher_cast(req->base.tfm); -} - -/** - * crypto_ablkcipher_encrypt() - encrypt plaintext - * @req: reference to the ablkcipher_request handle that holds all information - * needed to perform the cipher operation - * - * Encrypt plaintext data using the ablkcipher_request handle. That data - * structure and how it is filled with data is discussed with the - * ablkcipher_request_* functions. - * - * Return: 0 if the cipher operation was successful; < 0 if an error occurred - */ -static inline int crypto_ablkcipher_encrypt(struct ablkcipher_request *req) -{ - struct ablkcipher_tfm *crt = - crypto_ablkcipher_crt(crypto_ablkcipher_reqtfm(req)); - struct crypto_alg *alg = crt->base->base.__crt_alg; - unsigned int nbytes = req->nbytes; - int ret; - - crypto_stats_get(alg); - ret = crt->encrypt(req); - crypto_stats_ablkcipher_encrypt(nbytes, ret, alg); - return ret; -} - -/** - * crypto_ablkcipher_decrypt() - decrypt ciphertext - * @req: reference to the ablkcipher_request handle that holds all information - * needed to perform the cipher operation - * - * Decrypt ciphertext data using the ablkcipher_request handle. That data - * structure and how it is filled with data is discussed with the - * ablkcipher_request_* functions. - * - * Return: 0 if the cipher operation was successful; < 0 if an error occurred - */ -static inline int crypto_ablkcipher_decrypt(struct ablkcipher_request *req) -{ - struct ablkcipher_tfm *crt = - crypto_ablkcipher_crt(crypto_ablkcipher_reqtfm(req)); - struct crypto_alg *alg = crt->base->base.__crt_alg; - unsigned int nbytes = req->nbytes; - int ret; - - crypto_stats_get(alg); - ret = crt->decrypt(req); - crypto_stats_ablkcipher_decrypt(nbytes, ret, alg); - return ret; -} - -/** - * DOC: Asynchronous Cipher Request Handle - * - * The ablkcipher_request data structure contains all pointers to data - * required for the asynchronous cipher operation. This includes the cipher - * handle (which can be used by multiple ablkcipher_request instances), pointer - * to plaintext and ciphertext, asynchronous callback function, etc. It acts - * as a handle to the ablkcipher_request_* API calls in a similar way as - * ablkcipher handle to the crypto_ablkcipher_* API calls. - */ - -/** - * crypto_ablkcipher_reqsize() - obtain size of the request data structure - * @tfm: cipher handle - * - * Return: number of bytes - */ -static inline unsigned int crypto_ablkcipher_reqsize( - struct crypto_ablkcipher *tfm) -{ - return crypto_ablkcipher_crt(tfm)->reqsize; -} - -/** - * ablkcipher_request_set_tfm() - update cipher handle reference in request - * @req: request handle to be modified - * @tfm: cipher handle that shall be added to the request handle - * - * Allow the caller to replace the existing ablkcipher handle in the request - * data structure with a different one. - */ -static inline void ablkcipher_request_set_tfm( - struct ablkcipher_request *req, struct crypto_ablkcipher *tfm) -{ - req->base.tfm = crypto_ablkcipher_tfm(crypto_ablkcipher_crt(tfm)->base); -} - -static inline struct ablkcipher_request *ablkcipher_request_cast( - struct crypto_async_request *req) -{ - return container_of(req, struct ablkcipher_request, base); -} - -/** - * ablkcipher_request_alloc() - allocate request data structure - * @tfm: cipher handle to be registered with the request - * @gfp: memory allocation flag that is handed to kmalloc by the API call. - * - * Allocate the request data structure that must be used with the ablkcipher - * encrypt and decrypt API calls. During the allocation, the provided ablkcipher - * handle is registered in the request data structure. - * - * Return: allocated request handle in case of success, or NULL if out of memory - */ -static inline struct ablkcipher_request *ablkcipher_request_alloc( - struct crypto_ablkcipher *tfm, gfp_t gfp) -{ - struct ablkcipher_request *req; - - req = kmalloc(sizeof(struct ablkcipher_request) + - crypto_ablkcipher_reqsize(tfm), gfp); - - if (likely(req)) - ablkcipher_request_set_tfm(req, tfm); - - return req; -} - -/** - * ablkcipher_request_free() - zeroize and free request data structure - * @req: request data structure cipher handle to be freed - */ -static inline void ablkcipher_request_free(struct ablkcipher_request *req) -{ - kzfree(req); -} - -/** - * ablkcipher_request_set_callback() - set asynchronous callback function - * @req: request handle - * @flags: specify zero or an ORing of the flags - * CRYPTO_TFM_REQ_MAY_BACKLOG the request queue may back log and - * increase the wait queue beyond the initial maximum size; - * CRYPTO_TFM_REQ_MAY_SLEEP the request processing may sleep - * @compl: callback function pointer to be registered with the request handle - * @data: The data pointer refers to memory that is not used by the kernel - * crypto API, but provided to the callback function for it to use. Here, - * the caller can provide a reference to memory the callback function can - * operate on. As the callback function is invoked asynchronously to the - * related functionality, it may need to access data structures of the - * related functionality which can be referenced using this pointer. The - * callback function can access the memory via the "data" field in the - * crypto_async_request data structure provided to the callback function. - * - * This function allows setting the callback function that is triggered once the - * cipher operation completes. - * - * The callback function is registered with the ablkcipher_request handle and - * must comply with the following template:: - * - * void callback_function(struct crypto_async_request *req, int error) - */ -static inline void ablkcipher_request_set_callback( - struct ablkcipher_request *req, - u32 flags, crypto_completion_t compl, void *data) -{ - req->base.complete = compl; - req->base.data = data; - req->base.flags = flags; -} - -/** - * ablkcipher_request_set_crypt() - set data buffers - * @req: request handle - * @src: source scatter / gather list - * @dst: destination scatter / gather list - * @nbytes: number of bytes to process from @src - * @iv: IV for the cipher operation which must comply with the IV size defined - * by crypto_ablkcipher_ivsize - * - * This function allows setting of the source data and destination data - * scatter / gather lists. - * - * For encryption, the source is treated as the plaintext and the - * destination is the ciphertext. For a decryption operation, the use is - * reversed - the source is the ciphertext and the destination is the plaintext. - */ -static inline void ablkcipher_request_set_crypt( - struct ablkcipher_request *req, - struct scatterlist *src, struct scatterlist *dst, - unsigned int nbytes, void *iv) -{ - req->src = src; - req->dst = dst; - req->nbytes = nbytes; - req->info = iv; -} - /** * DOC: Single Block Cipher API * -- cgit v1.2.3 From 9e8d42a0f7eb9056f8bdb241b91738b5a2923f4c Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 8 Nov 2019 18:35:53 +0100 Subject: percpu-refcount: Use normal instead of RCU-sched" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a revert of commit a4244454df129 ("percpu-refcount: use RCU-sched insted of normal RCU") which claims the only reason for using RCU-sched is "rcu_read_[un]lock() … are slightly more expensive than preempt_disable/enable()" and "As the RCU critical sections are extremely short, using sched-RCU shouldn't have any latency implications." The problem with using RCU-sched here is that it disables preemption and the release callback (called from percpu_ref_put_many()) must not acquire any sleeping locks like spinlock_t. This breaks PREEMPT_RT because some of the users acquire spinlock_t locks in their callbacks. Using rcu_read_lock() on PREEMPTION=n kernels is not any different compared to rcu_read_lock_sched(). On PREEMPTION=y kernels there are already performance issues due to additional preemption points. Looking at the code, the rcu_read_lock() is just an increment and unlock is almost just a decrement unless there is something special to do. Both are functions while disabling preemption is inlined. Doing a small benchmark, the minimal amount of time required was mostly the same. The average time required was higher due to the higher MAX value (which could be preemption). With DEBUG_PREEMPT=y it is rcu_read_lock_sched() that takes a little longer due to the additional debug code. Convert back to normal RCU. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Dennis Zhou --- include/linux/percpu-refcount.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index 7aef0abc194a..390031e816dc 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -186,14 +186,14 @@ static inline void percpu_ref_get_many(struct percpu_ref *ref, unsigned long nr) { unsigned long __percpu *percpu_count; - rcu_read_lock_sched(); + rcu_read_lock(); if (__ref_is_percpu(ref, &percpu_count)) this_cpu_add(*percpu_count, nr); else atomic_long_add(nr, &ref->count); - rcu_read_unlock_sched(); + rcu_read_unlock(); } /** @@ -223,7 +223,7 @@ static inline bool percpu_ref_tryget(struct percpu_ref *ref) unsigned long __percpu *percpu_count; bool ret; - rcu_read_lock_sched(); + rcu_read_lock(); if (__ref_is_percpu(ref, &percpu_count)) { this_cpu_inc(*percpu_count); @@ -232,7 +232,7 @@ static inline bool percpu_ref_tryget(struct percpu_ref *ref) ret = atomic_long_inc_not_zero(&ref->count); } - rcu_read_unlock_sched(); + rcu_read_unlock(); return ret; } @@ -257,7 +257,7 @@ static inline bool percpu_ref_tryget_live(struct percpu_ref *ref) unsigned long __percpu *percpu_count; bool ret = false; - rcu_read_lock_sched(); + rcu_read_lock(); if (__ref_is_percpu(ref, &percpu_count)) { this_cpu_inc(*percpu_count); @@ -266,7 +266,7 @@ static inline bool percpu_ref_tryget_live(struct percpu_ref *ref) ret = atomic_long_inc_not_zero(&ref->count); } - rcu_read_unlock_sched(); + rcu_read_unlock(); return ret; } @@ -285,14 +285,14 @@ static inline void percpu_ref_put_many(struct percpu_ref *ref, unsigned long nr) { unsigned long __percpu *percpu_count; - rcu_read_lock_sched(); + rcu_read_lock(); if (__ref_is_percpu(ref, &percpu_count)) this_cpu_sub(*percpu_count, nr); else if (unlikely(atomic_long_sub_and_test(nr, &ref->count))) ref->release(ref); - rcu_read_unlock_sched(); + rcu_read_unlock(); } /** -- cgit v1.2.3 From adbb68293fc5950a46e3e22f9dc9c619661194ae Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 12 Nov 2019 17:00:24 -0800 Subject: libnvdimm: Move nd_device_attribute_group to device_type A 'struct device_type' instance can carry default attributes for the device. Use this facility to remove the export of nd_device_attribute_group and put the responsibility on the core rather than leaf implementations to define this attribute. For regions this creates a new nd_region_attribute_groups[] added to the per-region device-type instances. Cc: Ira Weiny Cc: Michael Ellerman Cc: "Oliver O'Halloran" Cc: Vishal Verma Cc: Aneesh Kumar K.V Reviewed-by: Aneesh Kumar K.V Link: https://lore.kernel.org/r/157309901138.1582359.12909354140826530394.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index b6eddf912568..d7dbf42498af 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -67,7 +67,6 @@ enum { extern struct attribute_group nvdimm_bus_attribute_group; extern struct attribute_group nvdimm_attribute_group; -extern struct attribute_group nd_device_attribute_group; extern struct attribute_group nd_numa_attribute_group; extern struct attribute_group nd_region_attribute_group; extern struct attribute_group nd_mapping_attribute_group; -- cgit v1.2.3 From f3d7c2292d104519195fdb11192daec13229c219 Mon Sep 17 00:00:00 2001 From: Bradley Bolen Date: Sat, 16 Nov 2019 20:00:45 -0500 Subject: mmc: core: Fix size overflow for mmc partitions With large eMMC cards, it is possible to create general purpose partitions that are bigger than 4GB. The size member of the mmc_part struct is only an unsigned int which overflows for gp partitions larger than 4GB. Change this to a u64 to handle the overflow. Signed-off-by: Bradley Bolen Signed-off-by: Ulf Hansson --- include/linux/mmc/card.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index e459b38ef33c..cf3780a6ccc4 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -226,7 +226,7 @@ struct mmc_queue_req; * MMC Physical partitions */ struct mmc_part { - unsigned int size; /* partition size (in bytes) */ + u64 size; /* partition size (in bytes) */ unsigned int part_cfg; /* partition type */ char name[MAX_MMC_PART_NAME_LEN]; bool force_ro; /* to make boot parts RO by default */ -- cgit v1.2.3 From 1e0bd5a091e5d9e0f1d5b0e6329b87bb1792f784 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sun, 17 Nov 2019 09:28:02 -0800 Subject: bpf: Switch bpf_map ref counter to atomic64_t so bpf_map_inc() never fails 92117d8443bc ("bpf: fix refcnt overflow") turned refcounting of bpf_map into potentially failing operation, when refcount reaches BPF_MAX_REFCNT limit (32k). Due to using 32-bit counter, it's possible in practice to overflow refcounter and make it wrap around to 0, causing erroneous map free, while there are still references to it, causing use-after-free problems. But having a failing refcounting operations are problematic in some cases. One example is mmap() interface. After establishing initial memory-mapping, user is allowed to arbitrarily map/remap/unmap parts of mapped memory, arbitrarily splitting it into multiple non-contiguous regions. All this happening without any control from the users of mmap subsystem. Rather mmap subsystem sends notifications to original creator of memory mapping through open/close callbacks, which are optionally specified during initial memory mapping creation. These callbacks are used to maintain accurate refcount for bpf_map (see next patch in this series). The problem is that open() callback is not supposed to fail, because memory-mapped resource is set up and properly referenced. This is posing a problem for using memory-mapping with BPF maps. One solution to this is to maintain separate refcount for just memory-mappings and do single bpf_map_inc/bpf_map_put when it goes from/to zero, respectively. There are similar use cases in current work on tcp-bpf, necessitating extra counter as well. This seems like a rather unfortunate and ugly solution that doesn't scale well to various new use cases. Another approach to solve this is to use non-failing refcount_t type, which uses 32-bit counter internally, but, once reaching overflow state at UINT_MAX, stays there. This utlimately causes memory leak, but prevents use after free. But given refcounting is not the most performance-critical operation with BPF maps (it's not used from running BPF program code), we can also just switch to 64-bit counter that can't overflow in practice, potentially disadvantaging 32-bit platforms a tiny bit. This simplifies semantics and allows above described scenarios to not worry about failing refcount increment operation. In terms of struct bpf_map size, we are still good and use the same amount of space: BEFORE (3 cache lines, 8 bytes of padding at the end): struct bpf_map { const struct bpf_map_ops * ops __attribute__((__aligned__(64))); /* 0 8 */ struct bpf_map * inner_map_meta; /* 8 8 */ void * security; /* 16 8 */ enum bpf_map_type map_type; /* 24 4 */ u32 key_size; /* 28 4 */ u32 value_size; /* 32 4 */ u32 max_entries; /* 36 4 */ u32 map_flags; /* 40 4 */ int spin_lock_off; /* 44 4 */ u32 id; /* 48 4 */ int numa_node; /* 52 4 */ u32 btf_key_type_id; /* 56 4 */ u32 btf_value_type_id; /* 60 4 */ /* --- cacheline 1 boundary (64 bytes) --- */ struct btf * btf; /* 64 8 */ struct bpf_map_memory memory; /* 72 16 */ bool unpriv_array; /* 88 1 */ bool frozen; /* 89 1 */ /* XXX 38 bytes hole, try to pack */ /* --- cacheline 2 boundary (128 bytes) --- */ atomic_t refcnt __attribute__((__aligned__(64))); /* 128 4 */ atomic_t usercnt; /* 132 4 */ struct work_struct work; /* 136 32 */ char name[16]; /* 168 16 */ /* size: 192, cachelines: 3, members: 21 */ /* sum members: 146, holes: 1, sum holes: 38 */ /* padding: 8 */ /* forced alignments: 2, forced holes: 1, sum forced holes: 38 */ } __attribute__((__aligned__(64))); AFTER (same 3 cache lines, no extra padding now): struct bpf_map { const struct bpf_map_ops * ops __attribute__((__aligned__(64))); /* 0 8 */ struct bpf_map * inner_map_meta; /* 8 8 */ void * security; /* 16 8 */ enum bpf_map_type map_type; /* 24 4 */ u32 key_size; /* 28 4 */ u32 value_size; /* 32 4 */ u32 max_entries; /* 36 4 */ u32 map_flags; /* 40 4 */ int spin_lock_off; /* 44 4 */ u32 id; /* 48 4 */ int numa_node; /* 52 4 */ u32 btf_key_type_id; /* 56 4 */ u32 btf_value_type_id; /* 60 4 */ /* --- cacheline 1 boundary (64 bytes) --- */ struct btf * btf; /* 64 8 */ struct bpf_map_memory memory; /* 72 16 */ bool unpriv_array; /* 88 1 */ bool frozen; /* 89 1 */ /* XXX 38 bytes hole, try to pack */ /* --- cacheline 2 boundary (128 bytes) --- */ atomic64_t refcnt __attribute__((__aligned__(64))); /* 128 8 */ atomic64_t usercnt; /* 136 8 */ struct work_struct work; /* 144 32 */ char name[16]; /* 176 16 */ /* size: 192, cachelines: 3, members: 21 */ /* sum members: 154, holes: 1, sum holes: 38 */ /* forced alignments: 2, forced holes: 1, sum forced holes: 38 */ } __attribute__((__aligned__(64))); This patch, while modifying all users of bpf_map_inc, also cleans up its interface to match bpf_map_put with separate operations for bpf_map_inc and bpf_map_inc_with_uref (to match bpf_map_put and bpf_map_put_with_uref, respectively). Also, given there are no users of bpf_map_inc_not_zero specifying uref=true, remove uref flag and default to uref=false internally. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20191117172806.2195367-2-andriin@fb.com --- include/linux/bpf.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5b81cde47314..34a34445c009 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -103,8 +103,8 @@ struct bpf_map { /* The 3rd and 4th cacheline with misc members to avoid false sharing * particularly with refcounting. */ - atomic_t refcnt ____cacheline_aligned; - atomic_t usercnt; + atomic64_t refcnt ____cacheline_aligned; + atomic64_t usercnt; struct work_struct work; char name[BPF_OBJ_NAME_LEN]; }; @@ -783,9 +783,9 @@ void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock); struct bpf_map *bpf_map_get_with_uref(u32 ufd); struct bpf_map *__bpf_map_get(struct fd f); -struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref); -struct bpf_map * __must_check bpf_map_inc_not_zero(struct bpf_map *map, - bool uref); +void bpf_map_inc(struct bpf_map *map); +void bpf_map_inc_with_uref(struct bpf_map *map); +struct bpf_map * __must_check bpf_map_inc_not_zero(struct bpf_map *map); void bpf_map_put_with_uref(struct bpf_map *map); void bpf_map_put(struct bpf_map *map); int bpf_map_charge_memlock(struct bpf_map *map, u32 pages); -- cgit v1.2.3 From 85192dbf4de08795afe2b88e52a36fc6abfc3dba Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sun, 17 Nov 2019 09:28:03 -0800 Subject: bpf: Convert bpf_prog refcnt to atomic64_t Similarly to bpf_map's refcnt/usercnt, convert bpf_prog's refcnt to atomic64 and remove artificial 32k limit. This allows to make bpf_prog's refcounting non-failing, simplifying logic of users of bpf_prog_add/bpf_prog_inc. Validated compilation by running allyesconfig kernel build. Suggested-by: Daniel Borkmann Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20191117172806.2195367-3-andriin@fb.com --- include/linux/bpf.h | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 34a34445c009..fb606dc61a3a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -485,7 +485,7 @@ struct bpf_func_info_aux { }; struct bpf_prog_aux { - atomic_t refcnt; + atomic64_t refcnt; u32 used_map_cnt; u32 max_ctx_offset; u32 max_pkt_offset; @@ -770,9 +770,9 @@ extern const struct bpf_verifier_ops xdp_analyzer_ops; struct bpf_prog *bpf_prog_get(u32 ufd); struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, bool attach_drv); -struct bpf_prog * __must_check bpf_prog_add(struct bpf_prog *prog, int i); +void bpf_prog_add(struct bpf_prog *prog, int i); void bpf_prog_sub(struct bpf_prog *prog, int i); -struct bpf_prog * __must_check bpf_prog_inc(struct bpf_prog *prog); +void bpf_prog_inc(struct bpf_prog *prog); struct bpf_prog * __must_check bpf_prog_inc_not_zero(struct bpf_prog *prog); void bpf_prog_put(struct bpf_prog *prog); int __bpf_prog_charge(struct user_struct *user, u32 pages); @@ -912,10 +912,8 @@ static inline struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, return ERR_PTR(-EOPNOTSUPP); } -static inline struct bpf_prog * __must_check bpf_prog_add(struct bpf_prog *prog, - int i) +static inline void bpf_prog_add(struct bpf_prog *prog, int i) { - return ERR_PTR(-EOPNOTSUPP); } static inline void bpf_prog_sub(struct bpf_prog *prog, int i) @@ -926,9 +924,8 @@ static inline void bpf_prog_put(struct bpf_prog *prog) { } -static inline struct bpf_prog * __must_check bpf_prog_inc(struct bpf_prog *prog) +static inline void bpf_prog_inc(struct bpf_prog *prog) { - return ERR_PTR(-EOPNOTSUPP); } static inline struct bpf_prog *__must_check -- cgit v1.2.3 From fc9702273e2edb90400a34b3be76f7b08fa3344b Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sun, 17 Nov 2019 09:28:04 -0800 Subject: bpf: Add mmap() support for BPF_MAP_TYPE_ARRAY Add ability to memory-map contents of BPF array map. This is extremely useful for working with BPF global data from userspace programs. It allows to avoid typical bpf_map_{lookup,update}_elem operations, improving both performance and usability. There had to be special considerations for map freezing, to avoid having writable memory view into a frozen map. To solve this issue, map freezing and mmap-ing is happening under mutex now: - if map is already frozen, no writable mapping is allowed; - if map has writable memory mappings active (accounted in map->writecnt), map freezing will keep failing with -EBUSY; - once number of writable memory mappings drops to zero, map freezing can be performed again. Only non-per-CPU plain arrays are supported right now. Maps with spinlocks can't be memory mapped either. For BPF_F_MMAPABLE array, memory allocation has to be done through vmalloc() to be mmap()'able. We also need to make sure that array data memory is page-sized and page-aligned, so we over-allocate memory in such a way that struct bpf_array is at the end of a single page of memory with array->value being aligned with the start of the second page. On deallocation we need to accomodate this memory arrangement to free vmalloc()'ed memory correctly. One important consideration regarding how memory-mapping subsystem functions. Memory-mapping subsystem provides few optional callbacks, among them open() and close(). close() is called for each memory region that is unmapped, so that users can decrease their reference counters and free up resources, if necessary. open() is *almost* symmetrical: it's called for each memory region that is being mapped, **except** the very first one. So bpf_map_mmap does initial refcnt bump, while open() will do any extra ones after that. Thus number of close() calls is equal to number of open() calls plus one more. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Song Liu Acked-by: John Fastabend Acked-by: Johannes Weiner Link: https://lore.kernel.org/bpf/20191117172806.2195367-4-andriin@fb.com --- include/linux/bpf.h | 11 ++++++++--- include/linux/vmalloc.h | 1 + 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index fb606dc61a3a..e913dd5946ae 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -68,6 +69,7 @@ struct bpf_map_ops { u64 *imm, u32 off); int (*map_direct_value_meta)(const struct bpf_map *map, u64 imm, u32 *off); + int (*map_mmap)(struct bpf_map *map, struct vm_area_struct *vma); }; struct bpf_map_memory { @@ -96,9 +98,10 @@ struct bpf_map { u32 btf_value_type_id; struct btf *btf; struct bpf_map_memory memory; + char name[BPF_OBJ_NAME_LEN]; bool unpriv_array; - bool frozen; /* write-once */ - /* 48 bytes hole */ + bool frozen; /* write-once; write-protected by freeze_mutex */ + /* 22 bytes hole */ /* The 3rd and 4th cacheline with misc members to avoid false sharing * particularly with refcounting. @@ -106,7 +109,8 @@ struct bpf_map { atomic64_t refcnt ____cacheline_aligned; atomic64_t usercnt; struct work_struct work; - char name[BPF_OBJ_NAME_LEN]; + struct mutex freeze_mutex; + u64 writecnt; /* writable mmap cnt; protected by freeze_mutex */ }; static inline bool map_value_has_spin_lock(const struct bpf_map *map) @@ -795,6 +799,7 @@ void bpf_map_charge_finish(struct bpf_map_memory *mem); void bpf_map_charge_move(struct bpf_map_memory *dst, struct bpf_map_memory *src); void *bpf_map_area_alloc(size_t size, int numa_node); +void *bpf_map_area_mmapable_alloc(size_t size, int numa_node); void bpf_map_area_free(void *base); void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr); diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 4e7809408073..b4c58a191eb1 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -93,6 +93,7 @@ extern void *vzalloc(unsigned long size); extern void *vmalloc_user(unsigned long size); extern void *vmalloc_node(unsigned long size, int node); extern void *vzalloc_node(unsigned long size, int node); +extern void *vmalloc_user_node_flags(unsigned long size, int node, gfp_t flags); extern void *vmalloc_exec(unsigned long size); extern void *vmalloc_32(unsigned long size); extern void *vmalloc_32_user(unsigned long size); -- cgit v1.2.3 From 496074f94b19574c77240d3b3f84cfb1097de51d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 14 Nov 2019 14:31:28 -0800 Subject: blk-cgroup: cgroup_rstat_updated() shouldn't be called on cgroup1 Currently, cgroup rstat is supported only on cgroup2 hierarchy and rstat functions shouldn't be called on cgroup1 cgroups. While converting blk-cgroup core statistics to rstat, f73316482977 ("blk-cgroup: reimplement basic IO stats using cgroup rstat") accidentally ended up calling cgroup_rstat_updated() on cgroup1 cgroups causing crashes. Longer term, we probably should add cgroup1 support to rstat but for now let's mask the call directly. Fixes: f73316482977 ("blk-cgroup: reimplement basic IO stats using cgroup rstat") Tested-by: Faiz Abbas Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 48a66738143d..19394c77ed99 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -626,7 +626,8 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q, bis->cur.ios[rwd]++; u64_stats_update_end(&bis->sync); - cgroup_rstat_updated(blkg->blkcg->css.cgroup, cpu); + if (cgroup_subsys_on_dfl(io_cgrp_subsys)) + cgroup_rstat_updated(blkg->blkcg->css.cgroup, cpu); put_cpu(); } -- cgit v1.2.3 From ea806eb3eab35528b578a061b2c4b28f0f92c465 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Sun, 17 Nov 2019 17:04:15 -0500 Subject: ftrace: Add a helper function to modify_ftrace_direct() to allow arch optimization If a direct ftrace callback is at a location that does not have any other ftrace helpers attached to it, it is possible to simply just change the text to call the new caller (if the architecture supports it). But this requires special architecture code. Currently, modify_ftrace_direct() uses a trick to add a stub ftrace callback to the location forcing it to call the ftrace iterator. Then it can change the direct helper to call the new function in C, and then remove the stub. Removing the stub will have the location now call the new location that the direct helper is using. The new helper function does the registering the stub trick, but is a weak function, allowing an architecture to override it to do something a bit more direct. Link: https://lore.kernel.org/r/20191115215125.mbqv7taqnx376yed@ast-mbp.dhcp.thefacebook.com Suggested-by: Alexei Starovoitov Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 73eb2e93593f..dfaa37e1943d 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -246,12 +246,24 @@ static inline void ftrace_free_init_mem(void) { } static inline void ftrace_free_mem(struct module *mod, void *start, void *end) { } #endif /* CONFIG_FUNCTION_TRACER */ +struct ftrace_func_entry { + struct hlist_node hlist; + unsigned long ip; + unsigned long direct; /* for direct lookup only */ +}; + +struct dyn_ftrace; + #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS extern int ftrace_direct_func_count; int register_ftrace_direct(unsigned long ip, unsigned long addr); int unregister_ftrace_direct(unsigned long ip, unsigned long addr); int modify_ftrace_direct(unsigned long ip, unsigned long old_addr, unsigned long new_addr); struct ftrace_direct_func *ftrace_find_direct_func(unsigned long addr); +int ftrace_modify_direct_caller(struct ftrace_func_entry *entry, + struct dyn_ftrace *rec, + unsigned long old_addr, + unsigned long new_addr); #else # define ftrace_direct_func_count 0 static inline int register_ftrace_direct(unsigned long ip, unsigned long addr) @@ -271,6 +283,13 @@ static inline struct ftrace_direct_func *ftrace_find_direct_func(unsigned long a { return NULL; } +static inline int ftrace_modify_direct_caller(struct ftrace_func_entry *entry, + struct dyn_ftrace *rec, + unsigned long old_addr, + unsigned long new_addr) +{ + return -ENODEV; +} #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ #ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS @@ -343,8 +362,6 @@ static inline void stack_tracer_enable(void) { } int ftrace_arch_code_modify_prepare(void); int ftrace_arch_code_modify_post_process(void); -struct dyn_ftrace; - enum ftrace_bug_type { FTRACE_BUG_UNKNOWN, FTRACE_BUG_INIT, -- cgit v1.2.3 From ae7c2d342a10dbef1e054482f46498b6282a1df0 Mon Sep 17 00:00:00 2001 From: Luhua Xu Date: Mon, 18 Nov 2019 12:57:16 +0800 Subject: spi: mediatek: add SPI_CS_HIGH support Change to use SPI_CS_HIGH to support spi CS polarity setting for chips support enhance_timing. Signed-off-by: Luhua Xu Link: https://lore.kernel.org/r/1574053037-26721-2-git-send-email-luhua.xu@mediatek.com Signed-off-by: Mark Brown --- include/linux/platform_data/spi-mt65xx.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/spi-mt65xx.h b/include/linux/platform_data/spi-mt65xx.h index f0e6d6483e62..65fd5ffd257c 100644 --- a/include/linux/platform_data/spi-mt65xx.h +++ b/include/linux/platform_data/spi-mt65xx.h @@ -11,7 +11,6 @@ /* Board specific platform_data */ struct mtk_chip_config { - u32 cs_pol; u32 sample_sel; }; #endif -- cgit v1.2.3 From 5ca470a0c3886b80ec13c3a19e9aae4c2f469202 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Fri, 4 Oct 2019 10:39:55 -0600 Subject: docs: Add request_irq() documentation While checking the results of the :c:func: removal, I noticed that there was no documentation for request_irq(), and request_threaded_irq() was not mentioned at all. Add a kerneldoc comment for request_irq() and add request_threaded_irq() to the list of functions. Reviewed-by: Thomas Gleixner Signed-off-by: Jonathan Corbet --- include/linux/interrupt.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 89fc59dab57d..ba873ec7e09d 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -140,6 +140,19 @@ request_threaded_irq(unsigned int irq, irq_handler_t handler, irq_handler_t thread_fn, unsigned long flags, const char *name, void *dev); +/** + * request_irq - Add a handler for an interrupt line + * @irq: The interrupt line to allocate + * @handler: Function to be called when the IRQ occurs. + * Primary handler for threaded interrupts + * If NULL, the default primary handler is installed + * @flags: Handling flags + * @name: Name of the device generating this interrupt + * @dev: A cookie passed to the handler function + * + * This call allocates an interrupt and establishes a handler; see + * the documentation for request_threaded_irq() for details. + */ static inline int __must_check request_irq(unsigned int irq, irq_handler_t handler, unsigned long flags, const char *name, void *dev) -- cgit v1.2.3 From 298e54fa810e027f1b0800d789eb862592721f08 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 15 Nov 2019 19:56:51 +0000 Subject: net: phy: add core phylib sfp support Add core phylib help for supporting SFP sockets on PHYs. This provides a mechanism to inform the SFP layer about PHY up/down events, and also unregister the SFP bus when the PHY is going away. Signed-off-by: Russell King Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/phy.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 78436d58ce7c..124516fe2763 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -203,6 +203,8 @@ static inline const char *phy_modes(phy_interface_t interface) struct device; struct phylink; +struct sfp_bus; +struct sfp_upstream_ops; struct sk_buff; /* @@ -342,6 +344,8 @@ struct phy_c45_device_ids { * dev_flags: Device-specific flags used by the PHY driver. * irq: IRQ number of the PHY's interrupt (-1 if none) * phy_timer: The timer for handling the state machine + * sfp_bus_attached: flag indicating whether the SFP bus has been attached + * sfp_bus: SFP bus attached to this PHY's fiber port * attached_dev: The attached enet driver's device instance ptr * adjust_link: Callback for the enet controller to respond to * changes in the link state. @@ -432,6 +436,9 @@ struct phy_device { struct mutex lock; + /* This may be modified under the rtnl lock */ + bool sfp_bus_attached; + struct sfp_bus *sfp_bus; struct phylink *phylink; struct net_device *attached_dev; @@ -1020,6 +1027,10 @@ int phy_suspend(struct phy_device *phydev); int phy_resume(struct phy_device *phydev); int __phy_resume(struct phy_device *phydev); int phy_loopback(struct phy_device *phydev, bool enable); +void phy_sfp_attach(void *upstream, struct sfp_bus *bus); +void phy_sfp_detach(void *upstream, struct sfp_bus *bus); +int phy_sfp_probe(struct phy_device *phydev, + const struct sfp_upstream_ops *ops); struct phy_device *phy_attach(struct net_device *dev, const char *bus_id, phy_interface_t interface); struct phy_device *phy_find_first(struct mii_bus *bus); -- cgit v1.2.3 From cbda56d5fefcebc01448982a55836c88a825b34c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 18 Nov 2019 12:11:24 +0100 Subject: cpuidle: Introduce cpuidle_driver_state_disabled() for driver quirks Commit 99e98d3fb100 ("cpuidle: Consolidate disabled state checks") overlooked the fact that the imx6q and tegra20 cpuidle drivers use the "disabled" field in struct cpuidle_state for quirks which trigger after the initialization of cpuidle, so reading the initial value of that field is not sufficient for those drivers. In order to allow them to implement the quirks without using the "disabled" field in struct cpuidle_state, introduce a new helper function and modify them to use it. Fixes: 99e98d3fb100 ("cpuidle: Consolidate disabled state checks") Reported-by: Len Brown Signed-off-by: Rafael J. Wysocki --- include/linux/cpuidle.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 22602747f468..afb6a573b46d 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -149,6 +149,8 @@ extern int cpuidle_register_driver(struct cpuidle_driver *drv); extern struct cpuidle_driver *cpuidle_get_driver(void); extern struct cpuidle_driver *cpuidle_driver_ref(void); extern void cpuidle_driver_unref(void); +extern void cpuidle_driver_state_disabled(struct cpuidle_driver *drv, int idx, + bool disable); extern void cpuidle_unregister_driver(struct cpuidle_driver *drv); extern int cpuidle_register_device(struct cpuidle_device *dev); extern void cpuidle_unregister_device(struct cpuidle_device *dev); @@ -186,6 +188,8 @@ static inline int cpuidle_register_driver(struct cpuidle_driver *drv) static inline struct cpuidle_driver *cpuidle_get_driver(void) {return NULL; } static inline struct cpuidle_driver *cpuidle_driver_ref(void) {return NULL; } static inline void cpuidle_driver_unref(void) {} +static inline void cpuidle_driver_state_disabled(struct cpuidle_driver *drv, + int idx, bool disable) { } static inline void cpuidle_unregister_driver(struct cpuidle_driver *drv) { } static inline int cpuidle_register_device(struct cpuidle_device *dev) {return -ENODEV; } -- cgit v1.2.3 From 3c4d77b68928df6c2bf07f4c3ba8e5d5e490bf4e Mon Sep 17 00:00:00 2001 From: Nick Crews Date: Thu, 24 Oct 2019 16:28:05 -0600 Subject: platform/chrome: wilco_ec: Add charging config driver Add a device to control the charging algorithm used on Wilco devices, which will be picked up by the drivers/power/supply/wilco-charger.c driver. See Documentation/ABI/testing/sysfs-class-power-wilco for the userspace interface and other info. Signed-off-by: Nick Crews Signed-off-by: Enric Balletbo i Serra --- include/linux/platform_data/wilco-ec.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/wilco-ec.h b/include/linux/platform_data/wilco-ec.h index ad03b586a095..0d104e780632 100644 --- a/include/linux/platform_data/wilco-ec.h +++ b/include/linux/platform_data/wilco-ec.h @@ -29,6 +29,7 @@ * @data_size: Size of the data buffer used for EC communication. * @debugfs_pdev: The child platform_device used by the debugfs sub-driver. * @rtc_pdev: The child platform_device used by the RTC sub-driver. + * @charger_pdev: Child platform_device used by the charger config sub-driver. * @telem_pdev: The child platform_device used by the telemetry sub-driver. */ struct wilco_ec_device { @@ -41,6 +42,7 @@ struct wilco_ec_device { size_t data_size; struct platform_device *debugfs_pdev; struct platform_device *rtc_pdev; + struct platform_device *charger_pdev; struct platform_device *telem_pdev; }; -- cgit v1.2.3 From 119a3cb6d687259f2be333351c1c5d634204e68b Mon Sep 17 00:00:00 2001 From: Daniel Campello Date: Wed, 6 Nov 2019 09:33:19 -0700 Subject: platform/chrome: wilco_ec: Add keyboard backlight LED support The EC is in charge of controlling the keyboard backlight on the Wilco platform. We expose a standard LED class device named platform::kbd_backlight. Since the EC will never change the backlight level of its own accord, we don't need to implement a brightness_get() method. Signed-off-by: Nick Crews Signed-off-by: Daniel Campello Reviewed-by: Daniel Campello Signed-off-by: Enric Balletbo i Serra --- include/linux/platform_data/wilco-ec.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/wilco-ec.h b/include/linux/platform_data/wilco-ec.h index 0d104e780632..afede15a95bf 100644 --- a/include/linux/platform_data/wilco-ec.h +++ b/include/linux/platform_data/wilco-ec.h @@ -122,6 +122,19 @@ struct wilco_ec_message { */ int wilco_ec_mailbox(struct wilco_ec_device *ec, struct wilco_ec_message *msg); +/** + * wilco_keyboard_leds_init() - Set up the keyboard backlight LEDs. + * @ec: EC device to query. + * + * After this call, the keyboard backlight will be exposed through a an LED + * device at /sys/class/leds. + * + * This may sleep because it uses wilco_ec_mailbox(). + * + * Return: 0 on success, negative error code on failure. + */ +int wilco_keyboard_leds_init(struct wilco_ec_device *ec); + /* * A Property is typically a data item that is stored to NVRAM * by the EC. Each of these data items has an index associated -- cgit v1.2.3 From e2f6a0e34870ff1bdb1411e250dd2f03908cfa9f Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 19 Nov 2019 09:51:54 -0800 Subject: libnvdimm: Move nd_numa_attribute_group to device_type A 'struct device_type' instance can carry default attributes for the device. Use this facility to remove the export of nd_numa_attribute_group and put the responsibility on the core rather than leaf implementations to define this attribute. Cc: Ira Weiny Cc: Michael Ellerman Cc: "Oliver O'Halloran" Cc: Vishal Verma Cc: Aneesh Kumar K.V Reviewed-by: Aneesh Kumar K.V Link: https://lore.kernel.org/r/157401269537.43284.14411189404186877352.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index d7dbf42498af..e9a4e25fc708 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -67,7 +67,6 @@ enum { extern struct attribute_group nvdimm_bus_attribute_group; extern struct attribute_group nvdimm_attribute_group; -extern struct attribute_group nd_numa_attribute_group; extern struct attribute_group nd_region_attribute_group; extern struct attribute_group nd_mapping_attribute_group; -- cgit v1.2.3 From 7c4fc8cde1641e3213eb1dafc6854331e9e0828c Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 12 Nov 2019 17:07:16 -0800 Subject: libnvdimm: Move nd_region_attribute_group to device_type A 'struct device_type' instance can carry default attributes for the device. Use this facility to remove the export of nd_region_attribute_group and put the responsibility on the core rather than leaf implementations to define this attribute. Cc: Ira Weiny Cc: Michael Ellerman Cc: "Oliver O'Halloran" Cc: Vishal Verma Cc: Aneesh Kumar K.V Signed-off-by: Dan Williams Reviewed-by: Aneesh Kumar K.V Link: https://lore.kernel.org/r/157309902169.1582359.16828508538444551337.stgit@dwillia2-desk3.amr.corp.intel.com --- include/linux/libnvdimm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index e9a4e25fc708..312248d334c7 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -67,7 +67,6 @@ enum { extern struct attribute_group nvdimm_bus_attribute_group; extern struct attribute_group nvdimm_attribute_group; -extern struct attribute_group nd_region_attribute_group; extern struct attribute_group nd_mapping_attribute_group; struct nvdimm; -- cgit v1.2.3 From 4ce79fa97e6a54ee028063381346dc2fea91a76b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 12 Nov 2019 17:07:39 -0800 Subject: libnvdimm: Move nd_mapping_attribute_group to device_type A 'struct device_type' instance can carry default attributes for the device. Use this facility to remove the export of nd_mapping_attribute_group and put the responsibility on the core rather than leaf implementations to define this attribute. Cc: Ira Weiny Cc: Michael Ellerman Cc: "Oliver O'Halloran" Cc: Vishal Verma Cc: Aneesh Kumar K.V Signed-off-by: Dan Williams Reviewed-by: Aneesh Kumar K.V Link: https://lore.kernel.org/r/157309902686.1582359.6749533709859492704.stgit@dwillia2-desk3.amr.corp.intel.com --- include/linux/libnvdimm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 312248d334c7..eb597d1cb891 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -67,7 +67,6 @@ enum { extern struct attribute_group nvdimm_bus_attribute_group; extern struct attribute_group nvdimm_attribute_group; -extern struct attribute_group nd_mapping_attribute_group; struct nvdimm; struct nvdimm_bus_descriptor; -- cgit v1.2.3 From 360eba7ebdf716194ed2ede1ebc3ce0f9790a91c Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 12 Nov 2019 17:08:04 -0800 Subject: libnvdimm: Move nvdimm_attribute_group to device_type A 'struct device_type' instance can carry default attributes for the device. Use this facility to remove the export of nvdimm_attribute_group and put the responsibility on the core rather than leaf implementations to define this attribute. Cc: Ira Weiny Cc: Michael Ellerman Cc: "Oliver O'Halloran" Cc: Vishal Verma Cc: Aneesh Kumar K.V Signed-off-by: Dan Williams Reviewed-by: Aneesh Kumar K.V Link: https://lore.kernel.org/r/157309903201.1582359.10966209746585062329.stgit@dwillia2-desk3.amr.corp.intel.com --- include/linux/libnvdimm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index eb597d1cb891..3644af97bcb4 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -66,7 +66,6 @@ enum { }; extern struct attribute_group nvdimm_bus_attribute_group; -extern struct attribute_group nvdimm_attribute_group; struct nvdimm; struct nvdimm_bus_descriptor; -- cgit v1.2.3 From e755799aefa9385469bec49b2c2ccf1aaa33829a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 12 Nov 2019 17:08:56 -0800 Subject: libnvdimm: Move nvdimm_bus_attribute_group to device_type A 'struct device_type' instance can carry default attributes for the device. Use this facility to remove the export of nvdimm_bus_attribute_group and put the responsibility on the core rather than leaf implementations to define this attribute. Cc: Ira Weiny Cc: Michael Ellerman Cc: "Oliver O'Halloran" Cc: Vishal Verma Cc: Aneesh Kumar K.V Signed-off-by: Dan Williams Reviewed-by: Aneesh Kumar K.V Link: https://lore.kernel.org/r/157309903815.1582359.6418211876315050283.stgit@dwillia2-desk3.amr.corp.intel.com --- include/linux/libnvdimm.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 3644af97bcb4..9df091bd30ba 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -65,8 +65,6 @@ enum { DPA_RESOURCE_ADJUSTED = 1 << 0, }; -extern struct attribute_group nvdimm_bus_attribute_group; - struct nvdimm; struct nvdimm_bus_descriptor; typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc, -- cgit v1.2.3 From ba31c1a48538992316cc71ce94fa9cd3e7b427c0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 6 Nov 2019 22:55:36 +0100 Subject: futex: Move futex exit handling into futex code The futex exit handling is #ifdeffed into mm_release() which is not pretty to begin with. But upcoming changes to address futex exit races need to add more functionality to this exit code. Split it out into a function, move it into futex code and make the various futex exit functions static. Preparatory only and no functional change. Folded build fix from Borislav. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20191106224556.049705556@linutronix.de --- include/linux/compat.h | 2 -- include/linux/futex.h | 29 ++++++++++++++++------------- 2 files changed, 16 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 16dafd9f4b86..c4c389c7e1b4 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -410,8 +410,6 @@ struct compat_kexec_segment; struct compat_mq_attr; struct compat_msgbuf; -extern void compat_exit_robust_list(struct task_struct *curr); - #define BITS_PER_COMPAT_LONG (8*sizeof(compat_long_t)) #define BITS_TO_COMPAT_LONGS(bits) DIV_ROUND_UP(bits, BITS_PER_COMPAT_LONG) diff --git a/include/linux/futex.h b/include/linux/futex.h index ccaef0097785..d6ed11c51a8e 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -2,7 +2,9 @@ #ifndef _LINUX_FUTEX_H #define _LINUX_FUTEX_H +#include #include + #include struct inode; @@ -48,15 +50,24 @@ union futex_key { #define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = NULL } } #ifdef CONFIG_FUTEX -extern void exit_robust_list(struct task_struct *curr); -long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, - u32 __user *uaddr2, u32 val2, u32 val3); -#else -static inline void exit_robust_list(struct task_struct *curr) +static inline void futex_init_task(struct task_struct *tsk) { + tsk->robust_list = NULL; +#ifdef CONFIG_COMPAT + tsk->compat_robust_list = NULL; +#endif + INIT_LIST_HEAD(&tsk->pi_state_list); + tsk->pi_state_cache = NULL; } +void futex_mm_release(struct task_struct *tsk); + +long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, + u32 __user *uaddr2, u32 val2, u32 val3); +#else +static inline void futex_init_task(struct task_struct *tsk) { } +static inline void futex_mm_release(struct task_struct *tsk) { } static inline long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, u32 __user *uaddr2, u32 val2, u32 val3) @@ -65,12 +76,4 @@ static inline long do_futex(u32 __user *uaddr, int op, u32 val, } #endif -#ifdef CONFIG_FUTEX_PI -extern void exit_pi_state_list(struct task_struct *curr); -#else -static inline void exit_pi_state_list(struct task_struct *curr) -{ -} -#endif - #endif -- cgit v1.2.3 From 3d4775df0a89240f671861c6ab6e8d59af8e9e41 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 6 Nov 2019 22:55:37 +0100 Subject: futex: Replace PF_EXITPIDONE with a state The futex exit handling relies on PF_ flags. That's suboptimal as it requires a smp_mb() and an ugly lock/unlock of the exiting tasks pi_lock in the middle of do_exit() to enforce the observability of PF_EXITING in the futex code. Add a futex_state member to task_struct and convert the PF_EXITPIDONE logic over to the new state. The PF_EXITING dependency will be cleaned up in a later step. This prepares for handling various futex exit issues later. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20191106224556.149449274@linutronix.de --- include/linux/futex.h | 33 +++++++++++++++++++++++++++++++++ include/linux/sched.h | 2 +- 2 files changed, 34 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/futex.h b/include/linux/futex.h index d6ed11c51a8e..025ad96bcf9d 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -50,6 +50,10 @@ union futex_key { #define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = NULL } } #ifdef CONFIG_FUTEX +enum { + FUTEX_STATE_OK, + FUTEX_STATE_DEAD, +}; static inline void futex_init_task(struct task_struct *tsk) { @@ -59,6 +63,34 @@ static inline void futex_init_task(struct task_struct *tsk) #endif INIT_LIST_HEAD(&tsk->pi_state_list); tsk->pi_state_cache = NULL; + tsk->futex_state = FUTEX_STATE_OK; +} + +/** + * futex_exit_done - Sets the tasks futex state to FUTEX_STATE_DEAD + * @tsk: task to set the state on + * + * Set the futex exit state of the task lockless. The futex waiter code + * observes that state when a task is exiting and loops until the task has + * actually finished the futex cleanup. The worst case for this is that the + * waiter runs through the wait loop until the state becomes visible. + * + * This has two callers: + * + * - futex_mm_release() after the futex exit cleanup has been done + * + * - do_exit() from the recursive fault handling path. + * + * In case of a recursive fault this is best effort. Either the futex exit + * code has run already or not. If the OWNER_DIED bit has been set on the + * futex then the waiter can take it over. If not, the problem is pushed + * back to user space. If the futex exit code did not run yet, then an + * already queued waiter might block forever, but there is nothing which + * can be done about that. + */ +static inline void futex_exit_done(struct task_struct *tsk) +{ + tsk->futex_state = FUTEX_STATE_DEAD; } void futex_mm_release(struct task_struct *tsk); @@ -68,6 +100,7 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, #else static inline void futex_init_task(struct task_struct *tsk) { } static inline void futex_mm_release(struct task_struct *tsk) { } +static inline void futex_exit_done(struct task_struct *tsk) { } static inline long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, u32 __user *uaddr2, u32 val2, u32 val3) diff --git a/include/linux/sched.h b/include/linux/sched.h index 2c2e56bd8913..85dab2f721c9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1053,6 +1053,7 @@ struct task_struct { #endif struct list_head pi_state_list; struct futex_pi_state *pi_state_cache; + unsigned int futex_state; #endif #ifdef CONFIG_PERF_EVENTS struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts]; @@ -1441,7 +1442,6 @@ extern struct pid *cad_pid; */ #define PF_IDLE 0x00000002 /* I am an IDLE thread */ #define PF_EXITING 0x00000004 /* Getting shut down */ -#define PF_EXITPIDONE 0x00000008 /* PI exit done on shut down */ #define PF_VCPU 0x00000010 /* I'm a virtual CPU */ #define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */ #define PF_FORKNOEXEC 0x00000040 /* Forked but didn't exec */ -- cgit v1.2.3 From 4610ba7ad877fafc0a25a30c6c82015304120426 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 6 Nov 2019 22:55:38 +0100 Subject: exit/exec: Seperate mm_release() mm_release() contains the futex exit handling. mm_release() is called from do_exit()->exit_mm() and from exec()->exec_mm(). In the exit_mm() case PF_EXITING and the futex state is updated. In the exec_mm() case these states are not touched. As the futex exit code needs further protections against exit races, this needs to be split into two functions. Preparatory only, no functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20191106224556.240518241@linutronix.de --- include/linux/sched/mm.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index e6770012db18..c49257a3b510 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -117,8 +117,10 @@ extern struct mm_struct *get_task_mm(struct task_struct *task); * succeeds. */ extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode); -/* Remove the current tasks stale references to the old mm_struct */ -extern void mm_release(struct task_struct *, struct mm_struct *); +/* Remove the current tasks stale references to the old mm_struct on exit() */ +extern void exit_mm_release(struct task_struct *, struct mm_struct *); +/* Remove the current tasks stale references to the old mm_struct on exec() */ +extern void exec_mm_release(struct task_struct *, struct mm_struct *); #ifdef CONFIG_MEMCG extern void mm_update_next_owner(struct mm_struct *mm); -- cgit v1.2.3 From 150d71584b12809144b8145b817e83b81158ae5f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 6 Nov 2019 22:55:39 +0100 Subject: futex: Split futex_mm_release() for exit/exec To allow separate handling of the futex exit state in the futex exit code for exit and exec, split futex_mm_release() into two functions and invoke them from the corresponding exit/exec_mm_release() callsites. Preparatory only, no functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20191106224556.332094221@linutronix.de --- include/linux/futex.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/futex.h b/include/linux/futex.h index 025ad96bcf9d..6414cfaf88e0 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -93,14 +93,16 @@ static inline void futex_exit_done(struct task_struct *tsk) tsk->futex_state = FUTEX_STATE_DEAD; } -void futex_mm_release(struct task_struct *tsk); +void futex_exit_release(struct task_struct *tsk); +void futex_exec_release(struct task_struct *tsk); long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, u32 __user *uaddr2, u32 val2, u32 val3); #else static inline void futex_init_task(struct task_struct *tsk) { } -static inline void futex_mm_release(struct task_struct *tsk) { } static inline void futex_exit_done(struct task_struct *tsk) { } +static inline void futex_exit_release(struct task_struct *tsk) { } +static inline void futex_exec_release(struct task_struct *tsk) { } static inline long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, u32 __user *uaddr2, u32 val2, u32 val3) -- cgit v1.2.3 From 18f694385c4fd77a09851fd301236746ca83f3cb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 6 Nov 2019 22:55:41 +0100 Subject: futex: Mark the begin of futex exit explicitly Instead of relying on PF_EXITING use an explicit state for the futex exit and set it in the futex exit function. This moves the smp barrier and the lock/unlock serialization into the futex code. As with the DEAD state this is restricted to the exit path as exec continues to use the same task struct. This allows to simplify that logic in a next step. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20191106224556.539409004@linutronix.de --- include/linux/futex.h | 31 +++---------------------------- 1 file changed, 3 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/futex.h b/include/linux/futex.h index 6414cfaf88e0..9f2792427d64 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -52,6 +52,7 @@ union futex_key { #ifdef CONFIG_FUTEX enum { FUTEX_STATE_OK, + FUTEX_STATE_EXITING, FUTEX_STATE_DEAD, }; @@ -66,33 +67,7 @@ static inline void futex_init_task(struct task_struct *tsk) tsk->futex_state = FUTEX_STATE_OK; } -/** - * futex_exit_done - Sets the tasks futex state to FUTEX_STATE_DEAD - * @tsk: task to set the state on - * - * Set the futex exit state of the task lockless. The futex waiter code - * observes that state when a task is exiting and loops until the task has - * actually finished the futex cleanup. The worst case for this is that the - * waiter runs through the wait loop until the state becomes visible. - * - * This has two callers: - * - * - futex_mm_release() after the futex exit cleanup has been done - * - * - do_exit() from the recursive fault handling path. - * - * In case of a recursive fault this is best effort. Either the futex exit - * code has run already or not. If the OWNER_DIED bit has been set on the - * futex then the waiter can take it over. If not, the problem is pushed - * back to user space. If the futex exit code did not run yet, then an - * already queued waiter might block forever, but there is nothing which - * can be done about that. - */ -static inline void futex_exit_done(struct task_struct *tsk) -{ - tsk->futex_state = FUTEX_STATE_DEAD; -} - +void futex_exit_recursive(struct task_struct *tsk); void futex_exit_release(struct task_struct *tsk); void futex_exec_release(struct task_struct *tsk); @@ -100,7 +75,7 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, u32 __user *uaddr2, u32 val2, u32 val3); #else static inline void futex_init_task(struct task_struct *tsk) { } -static inline void futex_exit_done(struct task_struct *tsk) { } +static inline void futex_exit_recursive(struct task_struct *tsk) { } static inline void futex_exit_release(struct task_struct *tsk) { } static inline void futex_exec_release(struct task_struct *tsk) { } static inline long do_futex(u32 __user *uaddr, int op, u32 val, -- cgit v1.2.3 From 3f186d974826847a07bc7964d79ec4eded475ad9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 6 Nov 2019 22:55:44 +0100 Subject: futex: Add mutex around futex exit The mutex will be used in subsequent changes to replace the busy looping of a waiter when the futex owner is currently executing the exit cleanup to prevent a potential live lock. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20191106224556.845798895@linutronix.de --- include/linux/futex.h | 1 + include/linux/sched.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/futex.h b/include/linux/futex.h index 9f2792427d64..5cc3fed27d4c 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -65,6 +65,7 @@ static inline void futex_init_task(struct task_struct *tsk) INIT_LIST_HEAD(&tsk->pi_state_list); tsk->pi_state_cache = NULL; tsk->futex_state = FUTEX_STATE_OK; + mutex_init(&tsk->futex_exit_mutex); } void futex_exit_recursive(struct task_struct *tsk); diff --git a/include/linux/sched.h b/include/linux/sched.h index 85dab2f721c9..1ebe540f8a08 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1053,6 +1053,7 @@ struct task_struct { #endif struct list_head pi_state_list; struct futex_pi_state *pi_state_cache; + struct mutex futex_exit_mutex; unsigned int futex_state; #endif #ifdef CONFIG_PERF_EVENTS -- cgit v1.2.3 From c55b51a06b01d67a99457bb82a8c31081c7faa23 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Sat, 16 Nov 2019 14:16:12 +0100 Subject: cpuidle: Allow idle injection to apply exit latency limit In some cases it may be useful to specify an exit latency limit for the idle state to be used during CPU idle time injection. Instead of duplicating the information in struct cpuidle_device or propagating the latency limit in the call stack, replace the use_deepest_state field with forced_latency_limit_ns to represent that limit, so that the deepest idle state with exit latency within that limit is forced (i.e. no governors) when it is set. A zero exit latency limit for forced idle means to use governors in the usual way (analogous to use_deepest_state equal to "false" before this change). Additionally, add play_idle_precise() taking two arguments, the duration of forced idle and the idle state exit latency limit, both in nanoseconds, and redefine play_idle() as a wrapper around that new function. This change is preparatory, no functional impact is expected. Suggested-by: Rafael J. Wysocki Signed-off-by: Daniel Lezcano [ rjw: Subject, changelog, cpuidle_use_deepest_state() kerneldoc, whitespace ] Signed-off-by: Rafael J. Wysocki --- include/linux/cpu.h | 7 ++++++- include/linux/cpuidle.h | 6 +++--- 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index d0633ebdaa9c..cc03a7848b63 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -179,7 +179,12 @@ void arch_cpu_idle_dead(void); int cpu_report_state(int cpu); int cpu_check_up_prepare(int cpu); void cpu_set_state_online(int cpu); -void play_idle(unsigned long duration_us); +void play_idle_precise(u64 duration_ns, u64 latency_ns); + +static inline void play_idle(unsigned long duration_us) +{ + play_idle_precise(duration_us * NSEC_PER_USEC, U64_MAX); +} #ifdef CONFIG_HOTPLUG_CPU bool cpu_wait_death(unsigned int cpu, int seconds); diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index afb6a573b46d..72b26ff1de4b 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -85,7 +85,6 @@ struct cpuidle_driver_kobj; struct cpuidle_device { unsigned int registered:1; unsigned int enabled:1; - unsigned int use_deepest_state:1; unsigned int poll_time_limit:1; unsigned int cpu; ktime_t next_hrtimer; @@ -93,6 +92,7 @@ struct cpuidle_device { int last_state_idx; u64 last_residency_ns; u64 poll_limit_ns; + u64 forced_idle_latency_limit_ns; struct cpuidle_state_usage states_usage[CPUIDLE_STATE_MAX]; struct cpuidle_state_kobj *kobjs[CPUIDLE_STATE_MAX]; struct cpuidle_driver_kobj *kobj_driver; @@ -216,7 +216,7 @@ extern int cpuidle_find_deepest_state(struct cpuidle_driver *drv, struct cpuidle_device *dev); extern int cpuidle_enter_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev); -extern void cpuidle_use_deepest_state(bool enable); +extern void cpuidle_use_deepest_state(u64 latency_limit_ns); #else static inline int cpuidle_find_deepest_state(struct cpuidle_driver *drv, struct cpuidle_device *dev) @@ -224,7 +224,7 @@ static inline int cpuidle_find_deepest_state(struct cpuidle_driver *drv, static inline int cpuidle_enter_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev) {return -ENODEV; } -static inline void cpuidle_use_deepest_state(bool enable) +static inline void cpuidle_use_deepest_state(u64 latency_limit_ns) { } #endif -- cgit v1.2.3 From 5aa9ba6312e36c18626e73506b92d1513d815435 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Sat, 16 Nov 2019 14:16:13 +0100 Subject: cpuidle: Pass exit latency limit to cpuidle_use_deepest_state() Modify cpuidle_use_deepest_state() to take an additional exit latency limit argument to be passed to find_deepest_idle_state() and make cpuidle_idle_call() pass dev->forced_idle_latency_limit_ns to it for forced idle. Suggested-by: Rafael J. Wysocki Signed-off-by: Daniel Lezcano [ rjw: Rebase and rearrange code, subject & changelog ] Signed-off-by: Rafael J. Wysocki --- include/linux/cpuidle.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 72b26ff1de4b..2dbe46b7c213 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -213,13 +213,15 @@ static inline struct cpuidle_device *cpuidle_get_device(void) {return NULL; } #ifdef CONFIG_CPU_IDLE extern int cpuidle_find_deepest_state(struct cpuidle_driver *drv, - struct cpuidle_device *dev); + struct cpuidle_device *dev, + u64 latency_limit_ns); extern int cpuidle_enter_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev); extern void cpuidle_use_deepest_state(u64 latency_limit_ns); #else static inline int cpuidle_find_deepest_state(struct cpuidle_driver *drv, - struct cpuidle_device *dev) + struct cpuidle_device *dev, + u64 latency_limit_ns) {return -ENODEV; } static inline int cpuidle_enter_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev) -- cgit v1.2.3 From 1ed7d5c8f82d2aebabb6fe24f436a68bdd8c1065 Mon Sep 17 00:00:00 2001 From: Manish Narani Date: Wed, 20 Nov 2019 12:17:27 +0530 Subject: firmware: xilinx: Add SDIO Tap Delay nodes Add tap delay nodes for setting SDIO Tap Delays on ZynqMP platform. Signed-off-by: Manish Narani Signed-off-by: Ulf Hansson --- include/linux/firmware/xlnx-zynqmp.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index 778abbbc7d94..df366f1a4cb4 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -91,7 +91,8 @@ enum pm_ret_status { }; enum pm_ioctl_id { - IOCTL_SET_PLL_FRAC_MODE = 8, + IOCTL_SET_SD_TAPDELAY = 7, + IOCTL_SET_PLL_FRAC_MODE, IOCTL_GET_PLL_FRAC_MODE, IOCTL_SET_PLL_FRAC_DATA, IOCTL_GET_PLL_FRAC_DATA, @@ -250,6 +251,16 @@ enum zynqmp_pm_request_ack { ZYNQMP_PM_REQUEST_ACK_NON_BLOCKING, }; +enum pm_node_id { + NODE_SD_0 = 39, + NODE_SD_1, +}; + +enum tap_delay_type { + PM_TAPDELAY_INPUT = 0, + PM_TAPDELAY_OUTPUT, +}; + /** * struct zynqmp_pm_query_data - PM query data * @qid: query ID -- cgit v1.2.3 From ec11e5c213cc20cac5e8310728b06793448b9f6d Mon Sep 17 00:00:00 2001 From: Jon Derrick Date: Tue, 12 Nov 2019 05:47:53 -0700 Subject: PCI: vmd: Add device id for VMD device 8086:9A0B This patch adds support for this VMD device which supports the bus restriction mode. Signed-off-by: Jon Derrick Signed-off-by: Lorenzo Pieralisi --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 21a572469a4e..2302d133af6f 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -3006,6 +3006,7 @@ #define PCI_DEVICE_ID_INTEL_84460GX 0x84ea #define PCI_DEVICE_ID_INTEL_IXP4XX 0x8500 #define PCI_DEVICE_ID_INTEL_IXP2800 0x9004 +#define PCI_DEVICE_ID_INTEL_VMD_9A0B 0x9a0b #define PCI_DEVICE_ID_INTEL_S21152BB 0xb152 #define PCI_VENDOR_ID_SCALEMP 0x8686 -- cgit v1.2.3 From 331f63457165a30c708280de2c77f1742c6351dc Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 30 Oct 2019 17:30:57 -0500 Subject: PCI: of: Add inbound resource parsing to helpers Extend devm_of_pci_get_host_bridge_resources() and pci_parse_request_of_pci_ranges() helpers to also parse the inbound addresses from DT 'dma-ranges' and populate a resource list with the translated addresses. This will help ensure 'dma-ranges' is always parsed in a consistent way. Tested-by: Srinath Mannam Tested-by: Thomas Petazzoni # for AArdvark Signed-off-by: Rob Herring Signed-off-by: Lorenzo Pieralisi Reviewed-by: Srinath Mannam Reviewed-by: Andrew Murray Acked-by: Gustavo Pimentel Cc: Jingoo Han Cc: Gustavo Pimentel Cc: Lorenzo Pieralisi Cc: Bjorn Helgaas Cc: Thomas Petazzoni Cc: Will Deacon Cc: Linus Walleij Cc: Toan Le Cc: Ley Foon Tan Cc: Tom Joseph Cc: Ray Jui Cc: Scott Branden Cc: bcm-kernel-feedback-list@broadcom.com Cc: Ryder Lee Cc: Karthikeyan Mitran Cc: Hou Zhiqiang Cc: Simon Horman Cc: Shawn Lin Cc: Heiko Stuebner Cc: Michal Simek Cc: rfi@lists.rocketboards.org Cc: linux-mediatek@lists.infradead.org Cc: linux-renesas-soc@vger.kernel.org Cc: linux-rockchip@lists.infradead.org --- include/linux/pci.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index f9088c89a534..5cb94916eaa1 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2278,6 +2278,7 @@ struct irq_domain; struct irq_domain *pci_host_bridge_of_msi_domain(struct pci_bus *bus); int pci_parse_request_of_pci_ranges(struct device *dev, struct list_head *resources, + struct list_head *ib_resources, struct resource **bus_range); /* Arch may override this (weak) */ @@ -2286,9 +2287,11 @@ struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus); #else /* CONFIG_OF */ static inline struct irq_domain * pci_host_bridge_of_msi_domain(struct pci_bus *bus) { return NULL; } -static inline int pci_parse_request_of_pci_ranges(struct device *dev, - struct list_head *resources, - struct resource **bus_range) +static inline int +pci_parse_request_of_pci_ranges(struct device *dev, + struct list_head *resources, + struct list_head *ib_resources, + struct resource **bus_range) { return -EINVAL; } -- cgit v1.2.3 From 56e35f9c5b87ec1ae93e483284e189c84388de16 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 7 Nov 2019 18:03:11 +0100 Subject: dma-mapping: drop the dev argument to arch_sync_dma_for_* These are pure cache maintainance routines, so drop the unused struct device argument. Signed-off-by: Christoph Hellwig Suggested-by: Daniel Vetter --- include/linux/dma-noncoherent.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h index e30fca1f1b12..ca9b5770caee 100644 --- a/include/linux/dma-noncoherent.h +++ b/include/linux/dma-noncoherent.h @@ -73,29 +73,29 @@ static inline void arch_dma_cache_sync(struct device *dev, void *vaddr, #endif /* CONFIG_DMA_NONCOHERENT_CACHE_SYNC */ #ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE -void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr, - size_t size, enum dma_data_direction dir); +void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, + enum dma_data_direction dir); #else -static inline void arch_sync_dma_for_device(struct device *dev, - phys_addr_t paddr, size_t size, enum dma_data_direction dir) +static inline void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, + enum dma_data_direction dir) { } #endif /* ARCH_HAS_SYNC_DMA_FOR_DEVICE */ #ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU -void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, - size_t size, enum dma_data_direction dir); +void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, + enum dma_data_direction dir); #else -static inline void arch_sync_dma_for_cpu(struct device *dev, - phys_addr_t paddr, size_t size, enum dma_data_direction dir) +static inline void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, + enum dma_data_direction dir) { } #endif /* ARCH_HAS_SYNC_DMA_FOR_CPU */ #ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL -void arch_sync_dma_for_cpu_all(struct device *dev); +void arch_sync_dma_for_cpu_all(void); #else -static inline void arch_sync_dma_for_cpu_all(struct device *dev) +static inline void arch_sync_dma_for_cpu_all(void) { } #endif /* CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL */ -- cgit v1.2.3 From 130c1ccbf55330b55e82612a6e54eebb82c9d746 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 12 Nov 2019 17:06:04 +0100 Subject: dma-direct: unify the dma_capable definitions Currently each architectures that wants to override dma_to_phys and phys_to_dma also has to provide dma_capable. But there isn't really any good reason for that. powerpc and mips just have copies of the generic one minus the latests fix, and the arm one was the inspiration for said fix, but misses the bus_dma_mask handling. Make all architectures use the generic version instead. Signed-off-by: Christoph Hellwig Acked-by: Michael Ellerman (powerpc) Reviewed-by: Nicolas Saenz Julienne --- include/linux/dma-direct.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index 6db863c3eb93..991f8aa2676e 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -24,6 +24,7 @@ static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dev_addr) return paddr + ((phys_addr_t)dev->dma_pfn_offset << PAGE_SHIFT); } +#endif /* !CONFIG_ARCH_HAS_PHYS_TO_DMA */ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) { @@ -38,7 +39,6 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) return end <= min_not_zero(*dev->dma_mask, dev->bus_dma_mask); } -#endif /* !CONFIG_ARCH_HAS_PHYS_TO_DMA */ #ifdef CONFIG_ARCH_HAS_FORCE_DMA_UNENCRYPTED bool force_dma_unencrypted(struct device *dev); -- cgit v1.2.3 From c7345159f7db6fb69ec1c3b3f8f28cd05c731be2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 12 Nov 2019 17:07:43 +0100 Subject: dma-direct: avoid a forward declaration for phys_to_dma Move dma_capable down a bit so that we don't need a forward declaration for phys_to_dma. Signed-off-by: Christoph Hellwig Reviewed-by: Nicolas Saenz Julienne --- include/linux/dma-direct.h | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index 991f8aa2676e..f8959f75e496 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -6,8 +6,6 @@ #include /* for min_low_pfn */ #include -static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr); - #ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA #include #else @@ -26,20 +24,6 @@ static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dev_addr) } #endif /* !CONFIG_ARCH_HAS_PHYS_TO_DMA */ -static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) -{ - dma_addr_t end = addr + size - 1; - - if (!dev->dma_mask) - return false; - - if (!IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT) && - min(addr, end) < phys_to_dma(dev, PFN_PHYS(min_low_pfn))) - return false; - - return end <= min_not_zero(*dev->dma_mask, dev->bus_dma_mask); -} - #ifdef CONFIG_ARCH_HAS_FORCE_DMA_UNENCRYPTED bool force_dma_unencrypted(struct device *dev); #else @@ -65,6 +49,20 @@ static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) return __sme_clr(__dma_to_phys(dev, daddr)); } +static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) +{ + dma_addr_t end = addr + size - 1; + + if (!dev->dma_mask) + return false; + + if (!IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT) && + min(addr, end) < phys_to_dma(dev, PFN_PHYS(min_low_pfn))) + return false; + + return end <= min_not_zero(*dev->dma_mask, dev->bus_dma_mask); +} + u64 dma_direct_get_required_mask(struct device *dev); void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); -- cgit v1.2.3 From 68a33b1794665ba8a1d1ef1d3bfcc7c587d380a6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 Nov 2019 17:38:58 +0100 Subject: dma-direct: exclude dma_direct_map_resource from the min_low_pfn check The valid memory address check in dma_capable only makes sense when mapping normal memory, not when using dma_map_resource to map a device resource. Add a new boolean argument to dma_capable to exclude that check for the dma_map_resource case. Fixes: b12d66278dd6 ("dma-direct: check for overflows on 32 bit DMA addresses") Reported-by: Marek Szyprowski Signed-off-by: Christoph Hellwig Acked-by: Marek Szyprowski Tested-by: Marek Szyprowski --- include/linux/dma-direct.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index f8959f75e496..99b77dd5f79b 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -49,14 +49,15 @@ static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) return __sme_clr(__dma_to_phys(dev, daddr)); } -static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) +static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size, + bool is_ram) { dma_addr_t end = addr + size - 1; if (!dev->dma_mask) return false; - if (!IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT) && + if (is_ram && !IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT) && min(addr, end) < phys_to_dma(dev, PFN_PHYS(min_low_pfn))) return false; -- cgit v1.2.3 From 91e6015b082b08a74e5d9d326f651e5890a93519 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 20 Nov 2019 22:38:16 +0100 Subject: bpf: Emit audit messages upon successful prog load and unload Allow for audit messages to be emitted upon BPF program load and unload for having a timeline of events. The load itself is in syscall context, so additional info about the process initiating the BPF prog creation can be logged and later directly correlated to the unload event. The only info really needed from BPF side is the globally unique prog ID where then audit user space tooling can query / dump all info needed about the specific BPF program right upon load event and enrich the record, thus these changes needed here can be kept small and non-intrusive to the core. Raw example output: # auditctl -D # auditctl -a always,exit -F arch=x86_64 -S bpf # ausearch --start recent -m 1334 [...] ---- time->Wed Nov 20 12:45:51 2019 type=PROCTITLE msg=audit(1574271951.590:8974): proctitle="./test_verifier" type=SYSCALL msg=audit(1574271951.590:8974): arch=c000003e syscall=321 success=yes exit=14 a0=5 a1=7ffe2d923e80 a2=78 a3=0 items=0 ppid=742 pid=949 auid=0 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=pts0 ses=2 comm="test_verifier" exe="/root/bpf-next/tools/testing/selftests/bpf/test_verifier" subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 key=(null) type=UNKNOWN[1334] msg=audit(1574271951.590:8974): auid=0 uid=0 gid=0 ses=2 subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 pid=949 comm="test_verifier" exe="/root/bpf-next/tools/testing/selftests/bpf/test_verifier" prog-id=3260 event=LOAD ---- time->Wed Nov 20 12:45:51 2019 type=UNKNOWN[1334] msg=audit(1574271951.590:8975): prog-id=3260 event=UNLOAD ---- [...] Signed-off-by: Daniel Borkmann Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20191120213816.8186-1-jolsa@kernel.org --- include/linux/audit.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index aee3dc9eb378..edd006f4597d 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -159,6 +159,7 @@ extern void audit_log_key(struct audit_buffer *ab, extern void audit_log_link_denied(const char *operation); extern void audit_log_lost(const char *message); +extern void audit_log_task(struct audit_buffer *ab); extern int audit_log_task_context(struct audit_buffer *ab); extern void audit_log_task_info(struct audit_buffer *ab); @@ -219,6 +220,8 @@ static inline void audit_log_key(struct audit_buffer *ab, char *key) { } static inline void audit_log_link_denied(const char *string) { } +static inline void audit_log_task(struct audit_buffer *ab) +{ } static inline int audit_log_task_context(struct audit_buffer *ab) { return 0; -- cgit v1.2.3 From 196e8ca74886c433dcfc64a809707074b936aaf5 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 20 Nov 2019 23:04:44 +0100 Subject: bpf: Switch bpf_map_{area_alloc,area_mmapable_alloc}() to u64 size Given we recently extended the original bpf_map_area_alloc() helper in commit fc9702273e2e ("bpf: Add mmap() support for BPF_MAP_TYPE_ARRAY"), we need to apply the same logic as in ff1c08e1f74b ("bpf: Change size to u64 for bpf_map_{area_alloc, charge_init}()"). To avoid conflicts, extend it for bpf-next. Reported-by: Stephen Rothwell Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e913dd5946ae..e89e86122233 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -794,12 +794,12 @@ void bpf_map_put_with_uref(struct bpf_map *map); void bpf_map_put(struct bpf_map *map); int bpf_map_charge_memlock(struct bpf_map *map, u32 pages); void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages); -int bpf_map_charge_init(struct bpf_map_memory *mem, size_t size); +int bpf_map_charge_init(struct bpf_map_memory *mem, u64 size); void bpf_map_charge_finish(struct bpf_map_memory *mem); void bpf_map_charge_move(struct bpf_map_memory *dst, struct bpf_map_memory *src); -void *bpf_map_area_alloc(size_t size, int numa_node); -void *bpf_map_area_mmapable_alloc(size_t size, int numa_node); +void *bpf_map_area_alloc(u64 size, int numa_node); +void *bpf_map_area_mmapable_alloc(u64 size, int numa_node); void bpf_map_area_free(void *base); void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr); -- cgit v1.2.3 From 12bcae44bf48595c71898330076576075590e15b Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 7 Oct 2019 07:52:28 -0500 Subject: PCI/PM: Use pci_WARN() to include device information Add and use pci_WARN() wrappers so warnings include device information. Link: https://lore.kernel.org/r/20191017212851.54237-3-helgaas@kernel.org Signed-off-by: Bjorn Helgaas Reviewed-by: Rafael J. Wysocki --- include/linux/pci.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index f9088c89a534..4846306d521c 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2400,4 +2400,12 @@ void pci_uevent_ers(struct pci_dev *pdev, enum pci_ers_result err_type); #define pci_info_ratelimited(pdev, fmt, arg...) \ dev_info_ratelimited(&(pdev)->dev, fmt, ##arg) +#define pci_WARN(pdev, condition, fmt, arg...) \ + WARN(condition, "%s %s: " fmt, \ + dev_driver_string(&(pdev)->dev), pci_name(pdev), ##arg) + +#define pci_WARN_ONCE(pdev, condition, fmt, arg...) \ + WARN_ONCE(condition, "%s %s: " fmt, \ + dev_driver_string(&(pdev)->dev), pci_name(pdev), ##arg) + #endif /* LINUX_PCI_H */ -- cgit v1.2.3 From 89cdbc3546354c359558a1809133902028c57da4 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 31 Oct 2019 17:53:04 -0500 Subject: PCI/PM: Remove unused pci_driver.resume_early() hook The struct pci_driver.resume_early() hook is one of the legacy PCI power management callbacks, and there are no remaining users of it. Remove it. Link: https://lore.kernel.org/r/20191101204558.210235-6-helgaas@kernel.org Signed-off-by: Bjorn Helgaas Reviewed-by: Rafael J. Wysocki --- include/linux/pci.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 4846306d521c..dd4596fc1208 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -806,7 +806,6 @@ struct module; * context, so it can sleep. * @suspend: Put device into low power state. * @suspend_late: Put device into low power state. - * @resume_early: Wake device from low power state. * @resume: Wake device from low power state. * (Please see Documentation/power/pci.rst for descriptions * of PCI Power Management and the related functions.) @@ -830,7 +829,6 @@ struct pci_driver { void (*remove)(struct pci_dev *dev); /* Device removed (NULL if not a hot-plug capable driver) */ int (*suspend)(struct pci_dev *dev, pm_message_t state); /* Device suspended */ int (*suspend_late)(struct pci_dev *dev, pm_message_t state); - int (*resume_early)(struct pci_dev *dev); int (*resume)(struct pci_dev *dev); /* Device woken up */ void (*shutdown)(struct pci_dev *dev); int (*sriov_configure)(struct pci_dev *dev, int num_vfs); /* On PF */ -- cgit v1.2.3 From 1a1daf097e21e544dd3e7c0ff620d78a9795fbf2 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 31 Oct 2019 17:37:54 -0500 Subject: PCI/PM: Remove unused pci_driver.suspend_late() hook The struct pci_driver.suspend_late() hook is one of the legacy PCI power management callbacks, and there are no remaining users of it. Remove it. Link: https://lore.kernel.org/r/20191101204558.210235-7-helgaas@kernel.org Signed-off-by: Bjorn Helgaas Reviewed-by: Rafael J. Wysocki --- include/linux/pci.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index dd4596fc1208..9b0e35e09874 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -805,7 +805,6 @@ struct module; * The remove function always gets called from process * context, so it can sleep. * @suspend: Put device into low power state. - * @suspend_late: Put device into low power state. * @resume: Wake device from low power state. * (Please see Documentation/power/pci.rst for descriptions * of PCI Power Management and the related functions.) @@ -828,7 +827,6 @@ struct pci_driver { int (*probe)(struct pci_dev *dev, const struct pci_device_id *id); /* New device inserted */ void (*remove)(struct pci_dev *dev); /* Device removed (NULL if not a hot-plug capable driver) */ int (*suspend)(struct pci_dev *dev, pm_message_t state); /* Device suspended */ - int (*suspend_late)(struct pci_dev *dev, pm_message_t state); int (*resume)(struct pci_dev *dev); /* Device woken up */ void (*shutdown)(struct pci_dev *dev); int (*sriov_configure)(struct pci_dev *dev, int num_vfs); /* On PF */ -- cgit v1.2.3 From d6aa37cd04fdafaf31ae89691e537535df43ca78 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 5 Nov 2019 11:30:36 +0100 Subject: PCI/PM: Avoid exporting __pci_complete_power_transition() Notice that radeon_set_suspend(), which is the only caller of __pci_complete_power_transition() outside of pci.c, really only cares about the pci_platform_power_transition() invoked by it, so export the latter instead of it, update the radeon driver to call pci_platform_power_transition() directly and make __pci_complete_power_transition() static. Code rearrangement, no intentional functional impact. Link: https://lore.kernel.org/r/1731661.ykamz2Tiuf@kreacher Signed-off-by: Rafael J. Wysocki Signed-off-by: Bjorn Helgaas Reviewed-by: Mika Westerberg --- include/linux/pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 9b0e35e09874..86976cccdfe3 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1228,7 +1228,7 @@ struct pci_cap_saved_state *pci_find_saved_ext_cap(struct pci_dev *dev, int pci_add_cap_save_buffer(struct pci_dev *dev, char cap, unsigned int size); int pci_add_ext_cap_save_buffer(struct pci_dev *dev, u16 cap, unsigned int size); -int __pci_complete_power_transition(struct pci_dev *dev, pci_power_t state); +int pci_platform_power_transition(struct pci_dev *dev, pci_power_t state); int pci_set_power_state(struct pci_dev *dev, pci_power_t state); pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state); bool pci_pme_capable(struct pci_dev *dev, pci_power_t state); -- cgit v1.2.3 From eb01fedc3d539f9443082aa2384c5d1ca26ed5c1 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 20 Nov 2019 18:32:25 -0500 Subject: ftrace: Return ENOTSUPP when DYNAMIC_FTRACE_WITH_DIRECT_CALLS is not configured When CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS is not set it's best to have the stub functions return ENOTSUPP instead of ENODEV, otherwise ENODEV is a valid error when ip is incorrect which is indistinguishable from ftrace not compiled in. Link: http://lkml.kernel.org/r/CAADnVQ+OzTikM9EhrfsC7NFsVYhATW1SVHxK64w3xn9qpk81pg@mail.gmail.com Signed-off-by: Alexei Starovoitov Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index dfaa37e1943d..af79b0f8cdc1 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -268,16 +268,16 @@ int ftrace_modify_direct_caller(struct ftrace_func_entry *entry, # define ftrace_direct_func_count 0 static inline int register_ftrace_direct(unsigned long ip, unsigned long addr) { - return -ENODEV; + return -ENOTSUPP; } static inline int unregister_ftrace_direct(unsigned long ip, unsigned long addr) { - return -ENODEV; + return -ENOTSUPP; } static inline int modify_ftrace_direct(unsigned long ip, unsigned long old_addr, unsigned long new_addr) { - return -ENODEV; + return -ENOTSUPP; } static inline struct ftrace_direct_func *ftrace_find_direct_func(unsigned long addr) { -- cgit v1.2.3 From f3c9a666b28572b1a0ae691a47d9a7de4d9cefb3 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 20 Nov 2019 12:29:59 +0000 Subject: net: sfp: soft status and control support Add support for the soft status and control register, which allows TX_FAULT and RX_LOS to be monitored and TX_DISABLE to be set. We make use of this when the board does not support GPIOs for these signals. Signed-off-by: Russell King Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/sfp.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sfp.h b/include/linux/sfp.h index 3b35efd85bb1..487fd9412d10 100644 --- a/include/linux/sfp.h +++ b/include/linux/sfp.h @@ -428,6 +428,10 @@ enum { SFP_TEC_CUR = 0x6c, SFP_STATUS = 0x6e, + SFP_STATUS_TX_DISABLE = BIT(7), + SFP_STATUS_TX_DISABLE_FORCE = BIT(6), + SFP_STATUS_TX_FAULT = BIT(2), + SFP_STATUS_RX_LOS = BIT(1), SFP_ALARM0 = 0x70, SFP_ALARM0_TEMP_HIGH = BIT(7), SFP_ALARM0_TEMP_LOW = BIT(6), -- cgit v1.2.3 From 74722bb223d0f236303b60c9509ff924a9713780 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 21 Nov 2019 03:44:26 +0100 Subject: sched/vtime: Bring up complete kcpustat accessor Many callsites want to fetch the values of system, user, user_nice, guest or guest_nice kcpustat fields altogether or at least a pair of these. In that case calling kcpustat_field() for each requested field brings unecessary overhead when we could fetch all of them in a row. So provide kcpustat_cpu_fetch() that fetches the whole kcpustat array in a vtime safe way under the same RCU and seqcount block. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Wanpeng Li Cc: Yauheni Kaliuta Link: https://lkml.kernel.org/r/20191121024430.19938-3-frederic@kernel.org Signed-off-by: Ingo Molnar --- include/linux/kernel_stat.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 79781196eb25..89f0745c096d 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -81,12 +81,19 @@ static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu) #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN extern u64 kcpustat_field(struct kernel_cpustat *kcpustat, enum cpu_usage_stat usage, int cpu); +extern void kcpustat_cpu_fetch(struct kernel_cpustat *dst, int cpu); #else static inline u64 kcpustat_field(struct kernel_cpustat *kcpustat, enum cpu_usage_stat usage, int cpu) { return kcpustat->cpustat[usage]; } + +static inline void kcpustat_cpu_fetch(struct kernel_cpustat *dst, int cpu) +{ + *dst = kcpustat_cpu(cpu); +} + #endif extern void account_user_time(struct task_struct *, u64); -- cgit v1.2.3 From c9b465683a554212c3dd92915ed2088849c513bf Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Tue, 19 Nov 2019 13:45:45 +0100 Subject: platform/chrome: cros_ec: Put docs with the code To avoid doc rot, put function documentations with code, not header. Use kernel-doc style comments for exported functions. Signed-off-by: Gwendal Grignou Acked-by: Jonathan Cameron Signed-off-by: Enric Balletbo i Serra --- include/linux/platform_data/cros_ec_proto.h | 103 ---------------------------- 1 file changed, 103 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h index eab7036cda09..0d4e4aaed37a 100644 --- a/include/linux/platform_data/cros_ec_proto.h +++ b/include/linux/platform_data/cros_ec_proto.h @@ -187,133 +187,30 @@ struct cros_ec_platform { u16 cmd_offset; }; -/** - * cros_ec_suspend() - Handle a suspend operation for the ChromeOS EC device. - * @ec_dev: Device to suspend. - * - * This can be called by drivers to handle a suspend event. - * - * Return: 0 on success or negative error code. - */ int cros_ec_suspend(struct cros_ec_device *ec_dev); -/** - * cros_ec_resume() - Handle a resume operation for the ChromeOS EC device. - * @ec_dev: Device to resume. - * - * This can be called by drivers to handle a resume event. - * - * Return: 0 on success or negative error code. - */ int cros_ec_resume(struct cros_ec_device *ec_dev); -/** - * cros_ec_prepare_tx() - Prepare an outgoing message in the output buffer. - * @ec_dev: Device to register. - * @msg: Message to write. - * - * This is intended to be used by all ChromeOS EC drivers, but at present - * only SPI uses it. Once LPC uses the same protocol it can start using it. - * I2C could use it now, with a refactor of the existing code. - * - * Return: 0 on success or negative error code. - */ int cros_ec_prepare_tx(struct cros_ec_device *ec_dev, struct cros_ec_command *msg); -/** - * cros_ec_check_result() - Check ec_msg->result. - * @ec_dev: EC device. - * @msg: Message to check. - * - * This is used by ChromeOS EC drivers to check the ec_msg->result for - * errors and to warn about them. - * - * Return: 0 on success or negative error code. - */ int cros_ec_check_result(struct cros_ec_device *ec_dev, struct cros_ec_command *msg); -/** - * cros_ec_cmd_xfer() - Send a command to the ChromeOS EC. - * @ec_dev: EC device. - * @msg: Message to write. - * - * Call this to send a command to the ChromeOS EC. This should be used - * instead of calling the EC's cmd_xfer() callback directly. - * - * Return: 0 on success or negative error code. - */ int cros_ec_cmd_xfer(struct cros_ec_device *ec_dev, struct cros_ec_command *msg); -/** - * cros_ec_cmd_xfer_status() - Send a command to the ChromeOS EC. - * @ec_dev: EC device. - * @msg: Message to write. - * - * This function is identical to cros_ec_cmd_xfer, except it returns success - * status only if both the command was transmitted successfully and the EC - * replied with success status. It's not necessary to check msg->result when - * using this function. - * - * Return: The number of bytes transferred on success or negative error code. - */ int cros_ec_cmd_xfer_status(struct cros_ec_device *ec_dev, struct cros_ec_command *msg); -/** - * cros_ec_register() - Register a new ChromeOS EC, using the provided info. - * @ec_dev: Device to register. - * - * Before calling this, allocate a pointer to a new device and then fill - * in all the fields up to the --private-- marker. - * - * Return: 0 on success or negative error code. - */ int cros_ec_register(struct cros_ec_device *ec_dev); -/** - * cros_ec_unregister() - Remove a ChromeOS EC. - * @ec_dev: Device to unregister. - * - * Call this to deregister a ChromeOS EC, then clean up any private data. - * - * Return: 0 on success or negative error code. - */ int cros_ec_unregister(struct cros_ec_device *ec_dev); -/** - * cros_ec_query_all() - Query the protocol version supported by the - * ChromeOS EC. - * @ec_dev: Device to register. - * - * Return: 0 on success or negative error code. - */ int cros_ec_query_all(struct cros_ec_device *ec_dev); -/** - * cros_ec_get_next_event() - Fetch next event from the ChromeOS EC. - * @ec_dev: Device to fetch event from. - * @wake_event: Pointer to a bool set to true upon return if the event might be - * treated as a wake event. Ignored if null. - * - * Return: negative error code on errors; 0 for no data; or else number of - * bytes received (i.e., an event was retrieved successfully). Event types are - * written out to @ec_dev->event_data.event_type on success. - */ int cros_ec_get_next_event(struct cros_ec_device *ec_dev, bool *wake_event); -/** - * cros_ec_get_host_event() - Return a mask of event set by the ChromeOS EC. - * @ec_dev: Device to fetch event from. - * - * When MKBP is supported, when the EC raises an interrupt, we collect the - * events raised and call the functions in the ec notifier. This function - * is a helper to know which events are raised. - * - * Return: 0 on error or non-zero bitmask of one or more EC_HOST_EVENT_*. - */ u32 cros_ec_get_host_event(struct cros_ec_device *ec_dev); #endif /* __LINUX_CROS_EC_PROTO_H */ -- cgit v1.2.3 From a16b2e28190255a0729c27902fa88fb8fff39bb0 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Tue, 19 Nov 2019 13:45:45 +0100 Subject: mfd / platform: cros_ec: Add sensor_count and make check_features public Add a new function to return the number of MEMS sensors available in a ChromeOS Embedded Controller. It uses MOTIONSENSE_CMD_DUMP if available or a specific memory map ACPI registers to find out. Also, make check_features public as it can be useful for other drivers to know what the Embedded Controller supports. Signed-off-by: Gwendal Grignou Acked-by: Lee Jones Signed-off-by: Enric Balletbo i Serra --- include/linux/platform_data/cros_ec_proto.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h index 0d4e4aaed37a..f3de0662135d 100644 --- a/include/linux/platform_data/cros_ec_proto.h +++ b/include/linux/platform_data/cros_ec_proto.h @@ -12,6 +12,7 @@ #include #include +#include #include #define CROS_EC_DEV_NAME "cros_ec" @@ -213,4 +214,8 @@ int cros_ec_get_next_event(struct cros_ec_device *ec_dev, bool *wake_event); u32 cros_ec_get_host_event(struct cros_ec_device *ec_dev); +int cros_ec_check_features(struct cros_ec_dev *ec, int feature); + +int cros_ec_get_sensor_count(struct cros_ec_dev *ec); + #endif /* __LINUX_CROS_EC_PROTO_H */ -- cgit v1.2.3 From 53067471188c4066fc393ab892d0a74482eac000 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Tue, 19 Nov 2019 13:45:45 +0100 Subject: iio / platform: cros_ec: Add cros-ec-sensorhub driver Similar to HID sensor stack, the new driver sits between cros-ec-dev and the IIO device drivers: The EC based IIO device topology would be: iio:device1 -> ...0/0000:00:1f.0/PNP0C09:00/GOOG0004:00/cros-ec-dev.6.auto/ cros-ec-sensorhub.7.auto/ cros-ec-accel.15.auto/ iio:device1 It will be expanded to control EC sensor FIFO. Signed-off-by: Gwendal Grignou Reviewed-by: Jonathan Cameron [Fix "unknown type name 'uint32_t'" type errors] Reported-by: kbuild test robot Signed-off-by: Enric Balletbo i Serra --- include/linux/platform_data/cros_ec_sensorhub.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 include/linux/platform_data/cros_ec_sensorhub.h (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_ec_sensorhub.h b/include/linux/platform_data/cros_ec_sensorhub.h new file mode 100644 index 000000000000..5f6f9bb65079 --- /dev/null +++ b/include/linux/platform_data/cros_ec_sensorhub.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Chrome OS EC MEMS Sensor Hub driver. + * + * Copyright 2019 Google LLC + */ + +#ifndef __LINUX_PLATFORM_DATA_CROS_EC_SENSORHUB_H +#define __LINUX_PLATFORM_DATA_CROS_EC_SENSORHUB_H + +#include + +/** + * struct cros_ec_sensorhub - Sensor Hub device data. + * + * @ec: Embedded Controller where the hub is located. + */ +struct cros_ec_sensorhub { + struct cros_ec_dev *ec; +}; + +#endif /* __LINUX_PLATFORM_DATA_CROS_EC_SENSORHUB_H */ -- cgit v1.2.3 From d60ac88a62df71cb12b2d60d2dae5658fb4eab43 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Tue, 19 Nov 2019 13:45:45 +0100 Subject: mfd / platform / iio: cros_ec: Register sensor through sensorhub Remove the duplicated code in MFD, since MFD just registers cros-ec-sensorhub if at least one sensor is present. Change IIO cros-ec driver to get the pointer to the cros-ec-dev through cros-ec-sensorhub. Signed-off-by: Gwendal Grignou Acked-by: Jonathan Cameron Acked-by: Lee Jones Signed-off-by: Enric Balletbo i Serra --- include/linux/platform_data/cros_ec_proto.h | 8 -------- include/linux/platform_data/cros_ec_sensorhub.h | 8 ++++++++ 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h index f3de0662135d..691f9e953a96 100644 --- a/include/linux/platform_data/cros_ec_proto.h +++ b/include/linux/platform_data/cros_ec_proto.h @@ -168,14 +168,6 @@ struct cros_ec_device { struct platform_device *pd; }; -/** - * struct cros_ec_sensor_platform - ChromeOS EC sensor platform information. - * @sensor_num: Id of the sensor, as reported by the EC. - */ -struct cros_ec_sensor_platform { - u8 sensor_num; -}; - /** * struct cros_ec_platform - ChromeOS EC platform information. * @ec_name: Name of EC device (e.g. 'cros-ec', 'cros-pd', ...) diff --git a/include/linux/platform_data/cros_ec_sensorhub.h b/include/linux/platform_data/cros_ec_sensorhub.h index 5f6f9bb65079..bef7ffc7fce1 100644 --- a/include/linux/platform_data/cros_ec_sensorhub.h +++ b/include/linux/platform_data/cros_ec_sensorhub.h @@ -10,6 +10,14 @@ #include +/** + * struct cros_ec_sensor_platform - ChromeOS EC sensor platform information. + * @sensor_num: Id of the sensor, as reported by the EC. + */ +struct cros_ec_sensor_platform { + u8 sensor_num; +}; + /** * struct cros_ec_sensorhub - Sensor Hub device data. * -- cgit v1.2.3 From 05a3c420eaa6857cb20afe7e3a3c39ed94a3b2c1 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Tue, 19 Nov 2019 13:45:46 +0100 Subject: platform/chrome: cros-ec: Record event timestamp in the hard irq To improve sensor timestamp precision, given EC and AP are in different time domains, the AP needs to try to record the exact moment an event was signalled to the AP by the EC as soon as possible after it happens. First thing in the hard irq is the best place for this. Signed-off-by: Gwendal Grignou Acked-by: Jonathan Cameron Acked-by: Lee Jones Signed-off-by: Enric Balletbo i Serra --- include/linux/platform_data/cros_ec_proto.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h index 691f9e953a96..02dc34f366d7 100644 --- a/include/linux/platform_data/cros_ec_proto.h +++ b/include/linux/platform_data/cros_ec_proto.h @@ -122,6 +122,8 @@ struct cros_ec_command { * @event_data: Raw payload transferred with the MKBP event. * @event_size: Size in bytes of the event data. * @host_event_wake_mask: Mask of host events that cause wake from suspend. + * @last_event_time: exact time from the hard irq when we got notified of + * a new event. * @ec: The platform_device used by the mfd driver to interface with the * main EC. * @pd: The platform_device used by the mfd driver to interface with the @@ -162,6 +164,7 @@ struct cros_ec_device { int event_size; u32 host_event_wake_mask; u32 last_resume_result; + ktime_t last_event_time; /* The platform devices used by the mfd driver */ struct platform_device *ec; @@ -210,4 +213,17 @@ int cros_ec_check_features(struct cros_ec_dev *ec, int feature); int cros_ec_get_sensor_count(struct cros_ec_dev *ec); +/** + * cros_ec_get_time_ns() - Return time in ns. + * + * This is the function used to record the time for last_event_time in struct + * cros_ec_device during the hard irq. + * + * Return: ktime_t format since boot. + */ +static inline ktime_t cros_ec_get_time_ns(void) +{ + return ktime_get_boottime_ns(); +} + #endif /* __LINUX_CROS_EC_PROTO_H */ -- cgit v1.2.3 From 3300fdd630d4d3d96e3ba9af63a740d3a4e8fc61 Mon Sep 17 00:00:00 2001 From: Enrico Granata Date: Tue, 19 Nov 2019 13:45:46 +0100 Subject: platform/chrome: cros_ec: handle MKBP more events flag The ChromeOS EC has support for signaling to the host that a single IRQ can serve multiple MKBP (Matrix KeyBoard Protocol) events. Doing this serves an optimization purpose, as it minimizes the number of round-trips into the interrupt handling machinery, and it proves beneficial to sensor timestamping as it keeps the desired synchronization of event times between the two processors. This patch adds kernel support for this EC feature, allowing the ec_irq to loop until all events have been served. Signed-off-by: Enrico Granata Signed-off-by: Gwendal Grignou Reviewed-by: Jonathan Cameron Acked-by: Lee Jones Signed-off-by: Enric Balletbo i Serra --- include/linux/platform_data/cros_ec_proto.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h index 02dc34f366d7..30098a551523 100644 --- a/include/linux/platform_data/cros_ec_proto.h +++ b/include/linux/platform_data/cros_ec_proto.h @@ -116,7 +116,9 @@ struct cros_ec_command { * code. * @pkt_xfer: Send packet to EC and get response. * @lock: One transaction at a time. - * @mkbp_event_supported: True if this EC supports the MKBP event protocol. + * @mkbp_event_supported: 0 if MKBP not supported. Otherwise its value is + * the maximum supported version of the MKBP host event + * command + 1. * @host_sleep_v1: True if this EC supports the sleep v1 command. * @event_notifier: Interrupt event notifier for transport devices. * @event_data: Raw payload transferred with the MKBP event. @@ -156,7 +158,7 @@ struct cros_ec_device { int (*pkt_xfer)(struct cros_ec_device *ec, struct cros_ec_command *msg); struct mutex lock; - bool mkbp_event_supported; + u8 mkbp_event_supported; bool host_sleep_v1; struct blocking_notifier_head event_notifier; @@ -205,7 +207,9 @@ int cros_ec_unregister(struct cros_ec_device *ec_dev); int cros_ec_query_all(struct cros_ec_device *ec_dev); -int cros_ec_get_next_event(struct cros_ec_device *ec_dev, bool *wake_event); +int cros_ec_get_next_event(struct cros_ec_device *ec_dev, + bool *wake_event, + bool *has_more_events); u32 cros_ec_get_host_event(struct cros_ec_device *ec_dev); @@ -213,6 +217,8 @@ int cros_ec_check_features(struct cros_ec_dev *ec, int feature); int cros_ec_get_sensor_count(struct cros_ec_dev *ec); +bool cros_ec_handle_event(struct cros_ec_device *ec_dev); + /** * cros_ec_get_time_ns() - Return time in ns. * -- cgit v1.2.3 From 4250b047039d324e0ff65267c8beb5bad5052a86 Mon Sep 17 00:00:00 2001 From: Kusanagi Kouichi Date: Thu, 21 Nov 2019 19:20:21 +0900 Subject: debugfs: Fix !DEBUG_FS debugfs_create_automount If DEBUG_FS=n, compile fails with the following error: kernel/trace/trace.c: In function 'tracing_init_dentry': kernel/trace/trace.c:8658:9: error: passing argument 3 of 'debugfs_create_automount' from incompatible pointer type [-Werror=incompatible-pointer-types] 8658 | trace_automount, NULL); | ^~~~~~~~~~~~~~~ | | | struct vfsmount * (*)(struct dentry *, void *) In file included from kernel/trace/trace.c:24: ./include/linux/debugfs.h:206:25: note: expected 'struct vfsmount * (*)(void *)' but argument is of type 'struct vfsmount * (*)(struct dentry *, void *)' 206 | struct vfsmount *(*f)(void *), | ~~~~~~~~~~~~~~~~~~~^~~~~~~~~~ Signed-off-by: Kusanagi Kouichi Link: https://lore.kernel.org/r/20191121102021787.MLMY.25002.ppp.dion.ne.jp@dmta0003.auone-net.jp Signed-off-by: Greg Kroah-Hartman --- include/linux/debugfs.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 0e8f2e0cb91f..bf9b6cafa4c2 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -54,6 +54,8 @@ static const struct file_operations __fops = { \ .llseek = no_llseek, \ } +typedef struct vfsmount *(*debugfs_automount_t)(struct dentry *, void *); + #if defined(CONFIG_DEBUG_FS) struct dentry *debugfs_lookup(const char *name, struct dentry *parent); @@ -75,7 +77,6 @@ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent); struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent, const char *dest); -typedef struct vfsmount *(*debugfs_automount_t)(struct dentry *, void *); struct dentry *debugfs_create_automount(const char *name, struct dentry *parent, debugfs_automount_t f, @@ -203,7 +204,7 @@ static inline struct dentry *debugfs_create_symlink(const char *name, static inline struct dentry *debugfs_create_automount(const char *name, struct dentry *parent, - struct vfsmount *(*f)(void *), + debugfs_automount_t f, void *data) { return ERR_PTR(-ENODEV); -- cgit v1.2.3 From bbd8810d399812f2016713565e4d8ff8f1508aa6 Mon Sep 17 00:00:00 2001 From: Krzysztof Wilczynski Date: Tue, 3 Sep 2019 13:30:59 +0200 Subject: PCI: Remove unused includes and superfluous struct declaration Remove and from being included directly as part of the include/linux/of_pci.h, and remove superfluous declaration of struct of_phandle_args. Move users of include to include and directly rather than rely on both being included transitively through . Link: https://lore.kernel.org/r/20190903113059.2901-1-kw@linux.com Signed-off-by: Krzysztof Wilczynski Signed-off-by: Bjorn Helgaas Reviewed-by: Rob Herring --- include/linux/of_pci.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h index 21a89c4880fa..29658c0ee71f 100644 --- a/include/linux/of_pci.h +++ b/include/linux/of_pci.h @@ -2,11 +2,10 @@ #ifndef __OF_PCI_H #define __OF_PCI_H -#include -#include +#include +#include struct pci_dev; -struct of_phandle_args; struct device_node; #if IS_ENABLED(CONFIG_OF) && IS_ENABLED(CONFIG_PCI) -- cgit v1.2.3 From b6866318657717c8914673a6394894d12bc9ff5e Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Thu, 21 Nov 2019 13:40:26 +0300 Subject: block: add iostat counters for flush requests Requests that triggers flushing volatile writeback cache to disk (barriers) have significant effect to overall performance. Block layer has sophisticated engine for combining several flush requests into one. But there is no statistics for actual flushes executed by disk. Requests which trigger flushes usually are barriers - zero-size writes. This patch adds two iostat counters into /sys/class/block/$dev/stat and /proc/diskstats - count of completed flush requests and their total time. Signed-off-by: Konstantin Khlebnikov Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 23a2fd534817..70254ae11769 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -377,6 +377,7 @@ enum stat_group { STAT_READ, STAT_WRITE, STAT_DISCARD, + STAT_FLUSH, NR_STAT_GROUPS }; -- cgit v1.2.3 From a7ba70f1787f977f970cd116076c6fce4b9e01cc Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Thu, 21 Nov 2019 10:26:44 +0100 Subject: dma-mapping: treat dev->bus_dma_mask as a DMA limit Using a mask to represent bus DMA constraints has a set of limitations. The biggest one being it can only hold a power of two (minus one). The DMA mapping code is already aware of this and treats dev->bus_dma_mask as a limit. This quirk is already used by some architectures although still rare. With the introduction of the Raspberry Pi 4 we've found a new contender for the use of bus DMA limits, as its PCIe bus can only address the lower 3GB of memory (of a total of 4GB). This is impossible to represent with a mask. To make things worse the device-tree code rounds non power of two bus DMA limits to the next power of two, which is unacceptable in this case. In the light of this, rename dev->bus_dma_mask to dev->bus_dma_limit all over the tree and treat it as such. Note that dev->bus_dma_limit should contain the higher accessible DMA address. Signed-off-by: Nicolas Saenz Julienne Reviewed-by: Robin Murphy Signed-off-by: Christoph Hellwig --- include/linux/device.h | 6 +++--- include/linux/dma-direct.h | 2 +- include/linux/dma-mapping.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 297239a08bb7..e396de656f20 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1186,8 +1186,8 @@ struct dev_links_info { * @coherent_dma_mask: Like dma_mask, but for alloc_coherent mapping as not all * hardware supports 64-bit addresses for consistent allocations * such descriptors. - * @bus_dma_mask: Mask of an upstream bridge or bus which imposes a smaller DMA - * limit than the device itself supports. + * @bus_dma_limit: Limit of an upstream bridge or bus which imposes a smaller + * DMA limit than the device itself supports. * @dma_pfn_offset: offset of DMA memory range relatively of RAM * @dma_parms: A low level driver may set these to teach IOMMU code about * segment limitations. @@ -1270,7 +1270,7 @@ struct device { not all hardware supports 64 bit addresses for consistent allocations such descriptors. */ - u64 bus_dma_mask; /* upstream dma_mask constraint */ + u64 bus_dma_limit; /* upstream dma constraint */ unsigned long dma_pfn_offset; struct device_dma_parameters *dma_parms; diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index 452f5280cde3..24b8684aa21d 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -63,7 +63,7 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size, min(addr, end) < phys_to_dma(dev, PFN_PHYS(min_low_pfn))) return false; - return end <= min_not_zero(*dev->dma_mask, dev->bus_dma_mask); + return end <= min_not_zero(*dev->dma_mask, dev->bus_dma_limit); } u64 dma_direct_get_required_mask(struct device *dev); diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 4d450672b7d6..c4d8741264bd 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -697,7 +697,7 @@ static inline int dma_coerce_mask_and_coherent(struct device *dev, u64 mask) */ static inline bool dma_addressing_limited(struct device *dev) { - return min_not_zero(dma_get_mask(dev), dev->bus_dma_mask) < + return min_not_zero(dma_get_mask(dev), dev->bus_dma_limit) < dma_get_required_mask(dev); } -- cgit v1.2.3 From 52deba0f02a98c150677a9c381cc1991a928bcff Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Fri, 15 Nov 2019 00:40:00 +0900 Subject: nvme: hwmon: provide temperature min and max values for each sensor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to the NVMe specification, the over temperature threshold and under temperature threshold features shall be implemented for Composite Temperature if a non-zero WCTEMP field value is reported in the Identify Controller data structure. The features are also implemented for all implemented temperature sensors (i.e., all Temperature Sensor fields that report a non-zero value). This provides the over temperature threshold and under temperature threshold for each sensor as temperature min and max values of hwmon sysfs attributes. The WCTEMP is already provided as a temperature max value for Composite Temperature, but this change isn't incompatible. Because the default value of the over temperature threshold for Composite Temperature is the WCTEMP. Now the alarm attribute for Composite Temperature indicates one of the temperature is outside of a temperature threshold. Because there is only a single bit in Critical Warning field that indicates a temperature is outside of a threshold. Example output from the "sensors" command: nvme-pci-0100 Adapter: PCI adapter Composite: +33.9°C (low = -273.1°C, high = +69.8°C) (crit = +79.8°C) Sensor 1: +34.9°C (low = -273.1°C, high = +65261.8°C) Sensor 2: +31.9°C (low = -273.1°C, high = +65261.8°C) Sensor 5: +47.9°C (low = -273.1°C, high = +65261.8°C) This also adds helper macros for kelvin from/to milli Celsius conversion, and replaces the repeated code in hwmon.c. Cc: Keith Busch Cc: Jens Axboe Cc: Christoph Hellwig Cc: Sagi Grimberg Cc: Jean Delvare Reviewed-by: Guenter Roeck Tested-by: Guenter Roeck Signed-off-by: Akinobu Mita Signed-off-by: Keith Busch --- include/linux/nvme.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 3eca4f7d8510..3d5189f46cb1 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -804,6 +804,12 @@ struct nvme_write_zeroes_cmd { /* Features */ +enum { + NVME_TEMP_THRESH_MASK = 0xffff, + NVME_TEMP_THRESH_SELECT_SHIFT = 16, + NVME_TEMP_THRESH_TYPE_UNDER = 0x100000, +}; + struct nvme_feat_auto_pst { __le64 entries[32]; }; -- cgit v1.2.3 From 7599a896f2e46e9c072e02a8299a67d4d2f96675 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 21 Nov 2019 16:58:53 +0100 Subject: audit: Move audit_log_task declaration under CONFIG_AUDITSYSCALL The 0-DAY found that audit_log_task is not declared under CONFIG_AUDITSYSCALL which causes compilation error when it is not defined: kernel/bpf/syscall.o: In function `bpf_audit_prog.isra.30': >> syscall.c:(.text+0x860): undefined reference to `audit_log_task' Adding the audit_log_task declaration and stub within CONFIG_AUDITSYSCALL ifdef. Fixes: 91e6015b082b ("bpf: Emit audit messages upon successful prog load and unload") Reported-by: kbuild test robot Signed-off-by: Jiri Olsa Signed-off-by: David S. Miller --- include/linux/audit.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index edd006f4597d..18925d924c73 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -159,7 +159,6 @@ extern void audit_log_key(struct audit_buffer *ab, extern void audit_log_link_denied(const char *operation); extern void audit_log_lost(const char *message); -extern void audit_log_task(struct audit_buffer *ab); extern int audit_log_task_context(struct audit_buffer *ab); extern void audit_log_task_info(struct audit_buffer *ab); @@ -220,8 +219,6 @@ static inline void audit_log_key(struct audit_buffer *ab, char *key) { } static inline void audit_log_link_denied(const char *string) { } -static inline void audit_log_task(struct audit_buffer *ab) -{ } static inline int audit_log_task_context(struct audit_buffer *ab) { return 0; @@ -361,6 +358,8 @@ static inline void audit_ptrace(struct task_struct *t) __audit_ptrace(t); } +extern void audit_log_task(struct audit_buffer *ab); + /* Private API (for audit.c only) */ extern void __audit_ipc_obj(struct kern_ipc_perm *ipcp); extern void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode); @@ -648,6 +647,9 @@ static inline void audit_ntp_log(const struct audit_ntp_data *ad) static inline void audit_ptrace(struct task_struct *t) { } + +static inline void audit_log_task(struct audit_buffer *ab) +{ } #define audit_n_rules 0 #define audit_signals 0 #endif /* CONFIG_AUDITSYSCALL */ -- cgit v1.2.3 From bedc61a922f9dbbe3bfb26ec2745f8cd63b57637 Mon Sep 17 00:00:00 2001 From: Andrea Parri Date: Tue, 15 Oct 2019 13:46:44 +0200 Subject: Drivers: hv: vmbus: Introduce table of VMBus protocol versions The technique used to get the next VMBus version seems increasisly clumsy as the number of VMBus versions increases. Performance is not a concern since this is only done once during system boot; it's just that we'll end up with more lines of code than is really needed. As an alternative, introduce a table with the version numbers listed in order (from the most recent to the oldest). vmbus_connect() loops through the versions listed in the table until it gets an accepted connection or gets to the end of the table (invalid version). Suggested-by: Michael Kelley Signed-off-by: Andrea Parri Reviewed-by: Wei Liu Reviewed-by: Michael Kelley Signed-off-by: Sasha Levin --- include/linux/hyperv.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index b4a017093b69..c08b62dbd151 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -192,10 +192,6 @@ static inline u32 hv_get_avail_to_write_percent( #define VERSION_WIN10 ((4 << 16) | (0)) #define VERSION_WIN10_V5 ((5 << 16) | (0)) -#define VERSION_INVAL -1 - -#define VERSION_CURRENT VERSION_WIN10_V5 - /* Make maximum size of pipe payload of 16K */ #define MAX_PIPE_DATA_PAYLOAD (sizeof(u8) * 16384) -- cgit v1.2.3 From 2d4f49b3e1e3a24ce16dfeeb2235688c8aba57ef Mon Sep 17 00:00:00 2001 From: Andrea Parri Date: Tue, 15 Oct 2019 13:46:45 +0200 Subject: Drivers: hv: vmbus: Enable VMBus protocol versions 4.1, 5.1 and 5.2 Hyper-V has added VMBus protocol versions 5.1 and 5.2 in recent release versions. Allow Linux guests to negotiate these new protocol versions on versions of Hyper-V that support them. While on this, also allow guests to negotiate the VMBus protocol version 4.1 (which was missing). Signed-off-by: Andrea Parri Reviewed-by: Wei Liu Reviewed-by: Michael Kelley Signed-off-by: Sasha Levin --- include/linux/hyperv.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index c08b62dbd151..f17f2cd22e39 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -182,15 +182,21 @@ static inline u32 hv_get_avail_to_write_percent( * 2 . 4 (Windows 8) * 3 . 0 (Windows 8 R2) * 4 . 0 (Windows 10) + * 4 . 1 (Windows 10 RS3) * 5 . 0 (Newer Windows 10) + * 5 . 1 (Windows 10 RS4) + * 5 . 2 (Windows Server 2019, RS5) */ #define VERSION_WS2008 ((0 << 16) | (13)) #define VERSION_WIN7 ((1 << 16) | (1)) #define VERSION_WIN8 ((2 << 16) | (4)) #define VERSION_WIN8_1 ((3 << 16) | (0)) -#define VERSION_WIN10 ((4 << 16) | (0)) +#define VERSION_WIN10 ((4 << 16) | (0)) +#define VERSION_WIN10_V4_1 ((4 << 16) | (1)) #define VERSION_WIN10_V5 ((5 << 16) | (0)) +#define VERSION_WIN10_V5_1 ((5 << 16) | (1)) +#define VERSION_WIN10_V5_2 ((5 << 16) | (2)) /* Make maximum size of pipe payload of 16K */ #define MAX_PIPE_DATA_PAYLOAD (sizeof(u8) * 16384) -- cgit v1.2.3 From af9ca6f9bb16e446a44393a797d0ae74d356a5c7 Mon Sep 17 00:00:00 2001 From: Branden Bonaby Date: Thu, 3 Oct 2019 17:01:49 -0400 Subject: drivers: hv: vmbus: Introduce latency testing Introduce user specified latency in the packet reception path By exposing the test parameters as part of the debugfs channel attributes. We will control the testing state via these attributes. Signed-off-by: Branden Bonaby Reviewed-by: Michael Kelley Signed-off-by: Sasha Levin --- include/linux/hyperv.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index f17f2cd22e39..26f3aeeae1ca 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -934,6 +934,21 @@ struct vmbus_channel { * full outbound ring buffer. */ u64 out_full_first; + + /* enabling/disabling fuzz testing on the channel (default is false)*/ + bool fuzz_testing_state; + + /* + * Interrupt delay will delay the guest from emptying the ring buffer + * for a specific amount of time. The delay is in microseconds and will + * be between 1 to a maximum of 1000, its default is 0 (no delay). + * The Message delay will delay guest reading on a per message basis + * in microseconds between 1 to 1000 with the default being 0 + * (no delay). + */ + u32 fuzz_testing_interrupt_delay; + u32 fuzz_testing_message_delay; + }; static inline bool is_hvsock_channel(const struct vmbus_channel *c) @@ -1182,6 +1197,10 @@ struct hv_device { struct vmbus_channel *channel; struct kset *channels_kset; + + /* place holder to keep track of the dir for hv device in debugfs */ + struct dentry *debug_dir; + }; -- cgit v1.2.3 From 30aad41721e087babcf27c5192474724d555936c Mon Sep 17 00:00:00 2001 From: Danit Goldberg Date: Wed, 6 Nov 2019 15:30:07 +0200 Subject: net/core: Add support for getting VF GUIDs Introduce a new ndo: ndo_get_vf_guid, to get from the net device the port and node GUID. New applications can choose to use this interface to show GUIDs with iproute2 with commands such as: - ip link show ib4 ib4: mtu 4092 qdisc noop state DOWN mode DEFAULT group default qlen 256 link/infiniband 00:00:0a:2d:fe:80:00:00:00:00:00:00:ec:0d:9a:03:00:44:36:8d brd 00:ff:ff:ff:ff:12:40:1b:ff:ff:00:00:00:00:00:00:ff:ff:ff:ff vf 0 link/infiniband 00:00:0a:2d:fe:80:00:00:00:00:00:00:ec:0d:9a:03:00:44:36:8d brd 00:ff:ff:ff:ff:12:40:1b:ff:ff:00:00:00:00:00:00:ff:ff:ff:ff, spoof checking off, NODE_GUID 22:44:33:00:33:11:00:33, PORT_GUID 10:21:33:12:00:11:22:10, link-state disable, trust off, query_rss off Signed-off-by: Danit Goldberg Acked-by: David Ahern Signed-off-by: Leon Romanovsky --- include/linux/netdevice.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9eda1c31d1f7..379338239e49 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1316,6 +1316,10 @@ struct net_device_ops { struct nlattr *port[]); int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb); + int (*ndo_get_vf_guid)(struct net_device *dev, + int vf, + struct ifla_vf_guid *node_guid, + struct ifla_vf_guid *port_guid); int (*ndo_set_vf_guid)(struct net_device *dev, int vf, u64 guid, int guid_type); -- cgit v1.2.3 From 677bf08cfdf9ee411c2084157f15d85edb09a81a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 21 Nov 2019 06:56:23 +0100 Subject: udp: drop skb extensions before marking skb stateless Once udp stack has set the UDP_SKB_IS_STATELESS flag, later skb free assumes all skb head state has been dropped already. This will leak the extension memory in case the skb has extensions other than the ipsec secpath, e.g. bridge nf data. To fix this, set the UDP_SKB_IS_STATELESS flag only if we don't have extensions or if the extension space can be free'd. Fixes: 895b5c9f206eb7d25dc1360a ("netfilter: drop bridge nf reset from nf_reset") Cc: Paolo Abeni Reported-by: Byron Stanoszek Signed-off-by: Florian Westphal Acked-by: Paolo Abeni Signed-off-by: David S. Miller --- include/linux/skbuff.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 64a395c7f689..8688f7adfda7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4169,12 +4169,18 @@ static inline void skb_ext_reset(struct sk_buff *skb) skb->active_extensions = 0; } } + +static inline bool skb_has_extensions(struct sk_buff *skb) +{ + return unlikely(skb->active_extensions); +} #else static inline void skb_ext_put(struct sk_buff *skb) {} static inline void skb_ext_reset(struct sk_buff *skb) {} static inline void skb_ext_del(struct sk_buff *skb, int unused) {} static inline void __skb_ext_copy(struct sk_buff *d, const struct sk_buff *s) {} static inline void skb_ext_copy(struct sk_buff *dst, const struct sk_buff *s) {} +static inline bool skb_has_extensions(struct sk_buff *skb) { return false; } #endif /* CONFIG_SKB_EXTENSIONS */ static inline void nf_reset_ct(struct sk_buff *skb) -- cgit v1.2.3 From a18fab48dbacbb7ff104a13e987778b7995bec07 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Mon, 28 Oct 2019 16:58:53 +0200 Subject: net/mlx5: DR, Add HW bits and definitions for Geneve flex parser Add definition for flex parser tunneling header for Geneve. Signed-off-by: Yevgeny Kliteynik Reviewed-by: Alex Vesker Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 4f912d4e67bc..5d54fccf87fc 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1110,6 +1110,7 @@ enum { }; enum { + MLX5_FLEX_PARSER_GENEVE_ENABLED = 1 << 3, MLX5_FLEX_PARSER_VXLAN_GPE_ENABLED = 1 << 7, MLX5_FLEX_PARSER_ICMP_V4_ENABLED = 1 << 8, MLX5_FLEX_PARSER_ICMP_V6_ENABLED = 1 << 9, -- cgit v1.2.3 From b41db132821fdad9d80a177344a47468791fbe62 Mon Sep 17 00:00:00 2001 From: "Enrico Weigelt, metux IT consult" Date: Thu, 21 Nov 2019 14:38:15 +0100 Subject: ftrace: Use BIT() macro It's cleaner to use the BIT() macro instead of raw shift operation. Link: http://lkml.kernel.org/r/20191121133815.15040-1-info@metux.net Signed-off-by: Enrico Weigelt, metux IT consult [ Added BIT() for bits 16 and 17 ] Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index af79b0f8cdc1..232806d5689d 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -149,24 +149,24 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops); * (internal ftrace only, should not be used by others) */ enum { - FTRACE_OPS_FL_ENABLED = 1 << 0, - FTRACE_OPS_FL_DYNAMIC = 1 << 1, - FTRACE_OPS_FL_SAVE_REGS = 1 << 2, - FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED = 1 << 3, - FTRACE_OPS_FL_RECURSION_SAFE = 1 << 4, - FTRACE_OPS_FL_STUB = 1 << 5, - FTRACE_OPS_FL_INITIALIZED = 1 << 6, - FTRACE_OPS_FL_DELETED = 1 << 7, - FTRACE_OPS_FL_ADDING = 1 << 8, - FTRACE_OPS_FL_REMOVING = 1 << 9, - FTRACE_OPS_FL_MODIFYING = 1 << 10, - FTRACE_OPS_FL_ALLOC_TRAMP = 1 << 11, - FTRACE_OPS_FL_IPMODIFY = 1 << 12, - FTRACE_OPS_FL_PID = 1 << 13, - FTRACE_OPS_FL_RCU = 1 << 14, - FTRACE_OPS_FL_TRACE_ARRAY = 1 << 15, - FTRACE_OPS_FL_PERMANENT = 1 << 16, - FTRACE_OPS_FL_DIRECT = 1 << 17, + FTRACE_OPS_FL_ENABLED = BIT(0), + FTRACE_OPS_FL_DYNAMIC = BIT(1), + FTRACE_OPS_FL_SAVE_REGS = BIT(2), + FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED = BIT(3), + FTRACE_OPS_FL_RECURSION_SAFE = BIT(4), + FTRACE_OPS_FL_STUB = BIT(5), + FTRACE_OPS_FL_INITIALIZED = BIT(6), + FTRACE_OPS_FL_DELETED = BIT(7), + FTRACE_OPS_FL_ADDING = BIT(8), + FTRACE_OPS_FL_REMOVING = BIT(9), + FTRACE_OPS_FL_MODIFYING = BIT(10), + FTRACE_OPS_FL_ALLOC_TRAMP = BIT(11), + FTRACE_OPS_FL_IPMODIFY = BIT(12), + FTRACE_OPS_FL_PID = BIT(13), + FTRACE_OPS_FL_RCU = BIT(14), + FTRACE_OPS_FL_TRACE_ARRAY = BIT(15), + FTRACE_OPS_FL_PERMANENT = BIT(16), + FTRACE_OPS_FL_DIRECT = BIT(17), }; #ifdef CONFIG_DYNAMIC_FTRACE -- cgit v1.2.3 From 28879787147358e8ffcae397f11748de3dd26577 Mon Sep 17 00:00:00 2001 From: Divya Indi Date: Wed, 20 Nov 2019 11:08:38 -0800 Subject: tracing: Adding new functions for kernel access to Ftrace instances Adding 2 new functions - 1) struct trace_array *trace_array_get_by_name(const char *name); Return pointer to a trace array with given name. If it does not exist, create and return pointer to the new trace array. 2) int trace_array_set_clr_event(struct trace_array *tr, const char *system ,const char *event, bool enable); Enable/Disable events to this trace array. Additionally, - To handle reference counters, export trace_array_put() - Due to introduction of the above 2 new functions, we no longer need to export - ftrace_set_clr_event & trace_array_create APIs. Link: http://lkml.kernel.org/r/1574276919-11119-2-git-send-email-divya.indi@oracle.com Signed-off-by: Divya Indi Reviewed-by: Aruna Ramakrishna Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace.h | 3 ++- include/linux/trace_events.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/trace.h b/include/linux/trace.h index 24fcf07812ae..7fd86d3c691f 100644 --- a/include/linux/trace.h +++ b/include/linux/trace.h @@ -29,7 +29,8 @@ struct trace_array; void trace_printk_init_buffers(void); int trace_array_printk(struct trace_array *tr, unsigned long ip, const char *fmt, ...); -struct trace_array *trace_array_create(const char *name); +void trace_array_put(struct trace_array *tr); +struct trace_array *trace_array_get_by_name(const char *name); int trace_array_destroy(struct trace_array *tr); #endif /* CONFIG_TRACING */ diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 60a41b7069dd..4c6e15605766 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -555,7 +555,8 @@ extern int trace_event_get_offsets(struct trace_event_call *call); int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set); int trace_set_clr_event(const char *system, const char *event, int set); - +int trace_array_set_clr_event(struct trace_array *tr, const char *system, + const char *event, bool enable); /* * The double __builtin_constant_p is because gcc will give us an error * if we try to allocate the static variable to fmt if it is not a -- cgit v1.2.3 From 84bb46cd62283cc371769ec1f77ff7924099f584 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 23 Nov 2019 09:54:58 -0800 Subject: Revert "bpf: Emit audit messages upon successful prog load and unload" This commit reverts commit 91e6015b082b ("bpf: Emit audit messages upon successful prog load and unload") and its follow up commit 7599a896f2e4 ("audit: Move audit_log_task declaration under CONFIG_AUDITSYSCALL") as requested by Paul Moore. The change needs close review on linux-audit, tests etc. Signed-off-by: Jakub Kicinski --- include/linux/audit.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 18925d924c73..aee3dc9eb378 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -358,8 +358,6 @@ static inline void audit_ptrace(struct task_struct *t) __audit_ptrace(t); } -extern void audit_log_task(struct audit_buffer *ab); - /* Private API (for audit.c only) */ extern void __audit_ipc_obj(struct kern_ipc_perm *ipcp); extern void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode); @@ -647,9 +645,6 @@ static inline void audit_ntp_log(const struct audit_ntp_data *ad) static inline void audit_ptrace(struct task_struct *t) { } - -static inline void audit_log_task(struct audit_buffer *ab) -{ } #define audit_n_rules 0 #define audit_signals 0 #endif /* CONFIG_AUDITSYSCALL */ -- cgit v1.2.3 From e3cf8b3668a808c1d252269ffc34a5723cfb9a7b Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 22 Nov 2019 12:37:08 +0000 Subject: net: phy: remove phy_ethtool_sset() There are no users of phy_ethtool_sset() in the kernel anymore, and as of commit 3c1bcc8614db ("net: ethernet: Convert phydev advertize and supported from u32 to link mode"), the implementation is slightly buggy - it doesn't correctly check the masked advertising mask as it used to. Remove it, and update the phy documentation to refer to its replacement function. Signed-off-by: Russell King Reviewed-by: Andrew Lunn Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 124516fe2763..f5cdfb206097 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1160,7 +1160,6 @@ void phy_queue_state_machine(struct phy_device *phydev, unsigned long jiffies); void phy_mac_interrupt(struct phy_device *phydev); void phy_start_machine(struct phy_device *phydev); void phy_stop_machine(struct phy_device *phydev); -int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd); void phy_ethtool_ksettings_get(struct phy_device *phydev, struct ethtool_link_ksettings *cmd); int phy_ethtool_ksettings_set(struct phy_device *phydev, -- cgit v1.2.3 From 99cb252f5e68d72afa3245a4e73d216d295cd335 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 12 Nov 2019 16:22:19 -0400 Subject: mm/mmu_notifier: add an interval tree notifier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Of the 13 users of mmu_notifiers, 8 of them use only invalidate_range_start/end() and immediately intersect the mmu_notifier_range with some kind of internal list of VAs. 4 use an interval tree (i915_gem, radeon_mn, umem_odp, hfi1). 4 use a linked list of some kind (scif_dma, vhost, gntdev, hmm) And the remaining 5 either don't use invalidate_range_start() or do some special thing with it. It turns out that building a correct scheme with an interval tree is pretty complicated, particularly if the use case is synchronizing against another thread doing get_user_pages(). Many of these implementations have various subtle and difficult to fix races. This approach puts the interval tree as common code at the top of the mmu notifier call tree and implements a shareable locking scheme. It includes: - An interval tree tracking VA ranges, with per-range callbacks - A read/write locking scheme for the interval tree that avoids sleeping in the notifier path (for OOM killer) - A sequence counter based collision-retry locking scheme to tell device page fault that a VA range is being concurrently invalidated. This is based on various ideas: - hmm accumulates invalidated VA ranges and releases them when all invalidates are done, via active_invalidate_ranges count. This approach avoids having to intersect the interval tree twice (as umem_odp does) at the potential cost of a longer device page fault. - kvm/umem_odp use a sequence counter to drive the collision retry, via invalidate_seq - a deferred work todo list on unlock scheme like RTNL, via deferred_list. This makes adding/removing interval tree members more deterministic - seqlock, except this version makes the seqlock idea multi-holder on the write side by protecting it with active_invalidate_ranges and a spinlock To minimize MM overhead when only the interval tree is being used, the entire SRCU and hlist overheads are dropped using some simple branches. Similarly the interval tree overhead is dropped when in hlist mode. The overhead from the mandatory spinlock is broadly the same as most of existing users which already had a lock (or two) of some sort on the invalidation path. Link: https://lore.kernel.org/r/20191112202231.3856-3-jgg@ziepe.ca Acked-by: Christian König Tested-by: Philip Yang Tested-by: Ralph Campbell Reviewed-by: John Hubbard Reviewed-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe --- include/linux/mmu_notifier.h | 101 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 12bd603d318c..9e6caa8ecd19 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -6,10 +6,12 @@ #include #include #include +#include struct mmu_notifier_mm; struct mmu_notifier; struct mmu_notifier_range; +struct mmu_interval_notifier; /** * enum mmu_notifier_event - reason for the mmu notifier callback @@ -32,6 +34,9 @@ struct mmu_notifier_range; * access flags). User should soft dirty the page in the end callback to make * sure that anyone relying on soft dirtyness catch pages that might be written * through non CPU mappings. + * + * @MMU_NOTIFY_RELEASE: used during mmu_interval_notifier invalidate to signal + * that the mm refcount is zero and the range is no longer accessible. */ enum mmu_notifier_event { MMU_NOTIFY_UNMAP = 0, @@ -39,6 +44,7 @@ enum mmu_notifier_event { MMU_NOTIFY_PROTECTION_VMA, MMU_NOTIFY_PROTECTION_PAGE, MMU_NOTIFY_SOFT_DIRTY, + MMU_NOTIFY_RELEASE, }; #define MMU_NOTIFIER_RANGE_BLOCKABLE (1 << 0) @@ -222,6 +228,26 @@ struct mmu_notifier { unsigned int users; }; +/** + * struct mmu_interval_notifier_ops + * @invalidate: Upon return the caller must stop using any SPTEs within this + * range. This function can sleep. Return false only if sleeping + * was required but mmu_notifier_range_blockable(range) is false. + */ +struct mmu_interval_notifier_ops { + bool (*invalidate)(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq); +}; + +struct mmu_interval_notifier { + struct interval_tree_node interval_tree; + const struct mmu_interval_notifier_ops *ops; + struct mm_struct *mm; + struct hlist_node deferred_item; + unsigned long invalidate_seq; +}; + #ifdef CONFIG_MMU_NOTIFIER #ifdef CONFIG_LOCKDEP @@ -263,6 +289,81 @@ extern int __mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm); extern void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm); + +unsigned long mmu_interval_read_begin(struct mmu_interval_notifier *mni); +int mmu_interval_notifier_insert(struct mmu_interval_notifier *mni, + struct mm_struct *mm, unsigned long start, + unsigned long length, + const struct mmu_interval_notifier_ops *ops); +int mmu_interval_notifier_insert_locked( + struct mmu_interval_notifier *mni, struct mm_struct *mm, + unsigned long start, unsigned long length, + const struct mmu_interval_notifier_ops *ops); +void mmu_interval_notifier_remove(struct mmu_interval_notifier *mni); + +/** + * mmu_interval_set_seq - Save the invalidation sequence + * @mni - The mni passed to invalidate + * @cur_seq - The cur_seq passed to the invalidate() callback + * + * This must be called unconditionally from the invalidate callback of a + * struct mmu_interval_notifier_ops under the same lock that is used to call + * mmu_interval_read_retry(). It updates the sequence number for later use by + * mmu_interval_read_retry(). The provided cur_seq will always be odd. + * + * If the caller does not call mmu_interval_read_begin() or + * mmu_interval_read_retry() then this call is not required. + */ +static inline void mmu_interval_set_seq(struct mmu_interval_notifier *mni, + unsigned long cur_seq) +{ + WRITE_ONCE(mni->invalidate_seq, cur_seq); +} + +/** + * mmu_interval_read_retry - End a read side critical section against a VA range + * mni: The range + * seq: The return of the paired mmu_interval_read_begin() + * + * This MUST be called under a user provided lock that is also held + * unconditionally by op->invalidate() when it calls mmu_interval_set_seq(). + * + * Each call should be paired with a single mmu_interval_read_begin() and + * should be used to conclude the read side. + * + * Returns true if an invalidation collided with this critical section, and + * the caller should retry. + */ +static inline bool mmu_interval_read_retry(struct mmu_interval_notifier *mni, + unsigned long seq) +{ + return mni->invalidate_seq != seq; +} + +/** + * mmu_interval_check_retry - Test if a collision has occurred + * mni: The range + * seq: The return of the matching mmu_interval_read_begin() + * + * This can be used in the critical section between mmu_interval_read_begin() + * and mmu_interval_read_retry(). A return of true indicates an invalidation + * has collided with this critical region and a future + * mmu_interval_read_retry() will return true. + * + * False is not reliable and only suggests a collision may not have + * occured. It can be called many times and does not have to hold the user + * provided lock. + * + * This call can be used as part of loops and other expensive operations to + * expedite a retry. + */ +static inline bool mmu_interval_check_retry(struct mmu_interval_notifier *mni, + unsigned long seq) +{ + /* Pairs with the WRITE_ONCE in mmu_interval_set_seq() */ + return READ_ONCE(mni->invalidate_seq) != seq; +} + extern void __mmu_notifier_mm_destroy(struct mm_struct *mm); extern void __mmu_notifier_release(struct mm_struct *mm); extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm, -- cgit v1.2.3 From 04ec32fbc2b29a640d67872d2f88daac4c73e45b Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 12 Nov 2019 16:22:20 -0400 Subject: mm/hmm: allow hmm_range to be used with a mmu_interval_notifier or hmm_mirror MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit hmm_mirror's handling of ranges does not use a sequence count which results in this bug: CPU0 CPU1 hmm_range_wait_until_valid(range) valid == true hmm_range_fault(range) hmm_invalidate_range_start() range->valid = false hmm_invalidate_range_end() range->valid = true hmm_range_valid(range) valid == true Where the hmm_range_valid() should not have succeeded. Adding the required sequence count would make it nearly identical to the new mmu_interval_notifier. Instead replace the hmm_mirror stuff with mmu_interval_notifier. Co-existence of the two APIs is the first step. Link: https://lore.kernel.org/r/20191112202231.3856-4-jgg@ziepe.ca Reviewed-by: Jérôme Glisse Tested-by: Philip Yang Tested-by: Ralph Campbell Reviewed-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe --- include/linux/hmm.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 3fec513b9c00..fbb35c78637e 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -145,6 +145,9 @@ enum hmm_pfn_value_e { /* * struct hmm_range - track invalidation lock on virtual address range * + * @notifier: an optional mmu_interval_notifier + * @notifier_seq: when notifier is used this is the result of + * mmu_interval_read_begin() * @hmm: the core HMM structure this range is active against * @vma: the vm area struct for the range * @list: all range lock are on a list @@ -159,6 +162,8 @@ enum hmm_pfn_value_e { * @valid: pfns array did not change since it has been fill by an HMM function */ struct hmm_range { + struct mmu_interval_notifier *notifier; + unsigned long notifier_seq; struct hmm *hmm; struct list_head list; unsigned long start; -- cgit v1.2.3 From 107e899874e95dcddc779142942bf285eba38bc5 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 12 Nov 2019 16:22:21 -0400 Subject: mm/hmm: define the pre-processor related parts of hmm.h even if disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only the function calls are stubbed out with static inlines that always fail. This is the standard way to write a header for an optional component and makes it easier for drivers that only optionally need HMM_MIRROR. Link: https://lore.kernel.org/r/20191112202231.3856-5-jgg@ziepe.ca Reviewed-by: Jérôme Glisse Tested-by: Ralph Campbell Reviewed-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe --- include/linux/hmm.h | 59 ++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 47 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hmm.h b/include/linux/hmm.h index fbb35c78637e..cb69bf10dc78 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -62,8 +62,6 @@ #include #include -#ifdef CONFIG_HMM_MIRROR - #include #include #include @@ -374,6 +372,15 @@ struct hmm_mirror { struct list_head list; }; +/* + * Retry fault if non-blocking, drop mmap_sem and return -EAGAIN in that case. + */ +#define HMM_FAULT_ALLOW_RETRY (1 << 0) + +/* Don't fault in missing PTEs, just snapshot the current state. */ +#define HMM_FAULT_SNAPSHOT (1 << 1) + +#ifdef CONFIG_HMM_MIRROR int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm); void hmm_mirror_unregister(struct hmm_mirror *mirror); @@ -383,14 +390,6 @@ void hmm_mirror_unregister(struct hmm_mirror *mirror); int hmm_range_register(struct hmm_range *range, struct hmm_mirror *mirror); void hmm_range_unregister(struct hmm_range *range); -/* - * Retry fault if non-blocking, drop mmap_sem and return -EAGAIN in that case. - */ -#define HMM_FAULT_ALLOW_RETRY (1 << 0) - -/* Don't fault in missing PTEs, just snapshot the current state. */ -#define HMM_FAULT_SNAPSHOT (1 << 1) - long hmm_range_fault(struct hmm_range *range, unsigned int flags); long hmm_range_dma_map(struct hmm_range *range, @@ -401,6 +400,44 @@ long hmm_range_dma_unmap(struct hmm_range *range, struct device *device, dma_addr_t *daddrs, bool dirty); +#else +int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm) +{ + return -EOPNOTSUPP; +} + +void hmm_mirror_unregister(struct hmm_mirror *mirror) +{ +} + +int hmm_range_register(struct hmm_range *range, struct hmm_mirror *mirror) +{ + return -EOPNOTSUPP; +} + +void hmm_range_unregister(struct hmm_range *range) +{ +} + +static inline long hmm_range_fault(struct hmm_range *range, unsigned int flags) +{ + return -EOPNOTSUPP; +} + +static inline long hmm_range_dma_map(struct hmm_range *range, + struct device *device, dma_addr_t *daddrs, + unsigned int flags) +{ + return -EOPNOTSUPP; +} + +static inline long hmm_range_dma_unmap(struct hmm_range *range, + struct device *device, + dma_addr_t *daddrs, bool dirty) +{ + return -EOPNOTSUPP; +} +#endif /* * HMM_RANGE_DEFAULT_TIMEOUT - default timeout (ms) when waiting for a range @@ -411,6 +448,4 @@ long hmm_range_dma_unmap(struct hmm_range *range, */ #define HMM_RANGE_DEFAULT_TIMEOUT 1000 -#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */ - #endif /* LINUX_HMM_H */ -- cgit v1.2.3 From a22dd506400d0f4784ad596f073b9eb5ed7c6a2a Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 12 Nov 2019 16:22:30 -0400 Subject: mm/hmm: remove hmm_mirror and related MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The only two users of this are now converted to use mmu_interval_notifier, delete all the code and update hmm.rst. Link: https://lore.kernel.org/r/20191112202231.3856-14-jgg@ziepe.ca Reviewed-by: Jérôme Glisse Tested-by: Ralph Campbell Reviewed-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe --- include/linux/hmm.h | 183 +--------------------------------------------------- 1 file changed, 2 insertions(+), 181 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hmm.h b/include/linux/hmm.h index cb69bf10dc78..1225b3c87aba 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -68,29 +68,6 @@ #include #include - -/* - * struct hmm - HMM per mm struct - * - * @mm: mm struct this HMM struct is bound to - * @lock: lock protecting ranges list - * @ranges: list of range being snapshotted - * @mirrors: list of mirrors for this mm - * @mmu_notifier: mmu notifier to track updates to CPU page table - * @mirrors_sem: read/write semaphore protecting the mirrors list - * @wq: wait queue for user waiting on a range invalidation - * @notifiers: count of active mmu notifiers - */ -struct hmm { - struct mmu_notifier mmu_notifier; - spinlock_t ranges_lock; - struct list_head ranges; - struct list_head mirrors; - struct rw_semaphore mirrors_sem; - wait_queue_head_t wq; - long notifiers; -}; - /* * hmm_pfn_flag_e - HMM flag enums * @@ -143,9 +120,8 @@ enum hmm_pfn_value_e { /* * struct hmm_range - track invalidation lock on virtual address range * - * @notifier: an optional mmu_interval_notifier - * @notifier_seq: when notifier is used this is the result of - * mmu_interval_read_begin() + * @notifier: a mmu_interval_notifier that includes the start/end + * @notifier_seq: result of mmu_interval_read_begin() * @hmm: the core HMM structure this range is active against * @vma: the vm area struct for the range * @list: all range lock are on a list @@ -162,8 +138,6 @@ enum hmm_pfn_value_e { struct hmm_range { struct mmu_interval_notifier *notifier; unsigned long notifier_seq; - struct hmm *hmm; - struct list_head list; unsigned long start; unsigned long end; uint64_t *pfns; @@ -172,32 +146,8 @@ struct hmm_range { uint64_t default_flags; uint64_t pfn_flags_mask; uint8_t pfn_shift; - bool valid; }; -/* - * hmm_range_wait_until_valid() - wait for range to be valid - * @range: range affected by invalidation to wait on - * @timeout: time out for wait in ms (ie abort wait after that period of time) - * Return: true if the range is valid, false otherwise. - */ -static inline bool hmm_range_wait_until_valid(struct hmm_range *range, - unsigned long timeout) -{ - return wait_event_timeout(range->hmm->wq, range->valid, - msecs_to_jiffies(timeout)) != 0; -} - -/* - * hmm_range_valid() - test if a range is valid or not - * @range: range - * Return: true if the range is valid, false otherwise. - */ -static inline bool hmm_range_valid(struct hmm_range *range) -{ - return range->valid; -} - /* * hmm_device_entry_to_page() - return struct page pointed to by a device entry * @range: range use to decode device entry value @@ -267,111 +217,6 @@ static inline uint64_t hmm_device_entry_from_pfn(const struct hmm_range *range, range->flags[HMM_PFN_VALID]; } -/* - * Mirroring: how to synchronize device page table with CPU page table. - * - * A device driver that is participating in HMM mirroring must always - * synchronize with CPU page table updates. For this, device drivers can either - * directly use mmu_notifier APIs or they can use the hmm_mirror API. Device - * drivers can decide to register one mirror per device per process, or just - * one mirror per process for a group of devices. The pattern is: - * - * int device_bind_address_space(..., struct mm_struct *mm, ...) - * { - * struct device_address_space *das; - * - * // Device driver specific initialization, and allocation of das - * // which contains an hmm_mirror struct as one of its fields. - * ... - * - * ret = hmm_mirror_register(&das->mirror, mm, &device_mirror_ops); - * if (ret) { - * // Cleanup on error - * return ret; - * } - * - * // Other device driver specific initialization - * ... - * } - * - * Once an hmm_mirror is registered for an address space, the device driver - * will get callbacks through sync_cpu_device_pagetables() operation (see - * hmm_mirror_ops struct). - * - * Device driver must not free the struct containing the hmm_mirror struct - * before calling hmm_mirror_unregister(). The expected usage is to do that when - * the device driver is unbinding from an address space. - * - * - * void device_unbind_address_space(struct device_address_space *das) - * { - * // Device driver specific cleanup - * ... - * - * hmm_mirror_unregister(&das->mirror); - * - * // Other device driver specific cleanup, and now das can be freed - * ... - * } - */ - -struct hmm_mirror; - -/* - * struct hmm_mirror_ops - HMM mirror device operations callback - * - * @update: callback to update range on a device - */ -struct hmm_mirror_ops { - /* release() - release hmm_mirror - * - * @mirror: pointer to struct hmm_mirror - * - * This is called when the mm_struct is being released. The callback - * must ensure that all access to any pages obtained from this mirror - * is halted before the callback returns. All future access should - * fault. - */ - void (*release)(struct hmm_mirror *mirror); - - /* sync_cpu_device_pagetables() - synchronize page tables - * - * @mirror: pointer to struct hmm_mirror - * @update: update information (see struct mmu_notifier_range) - * Return: -EAGAIN if mmu_notifier_range_blockable(update) is false - * and callback needs to block, 0 otherwise. - * - * This callback ultimately originates from mmu_notifiers when the CPU - * page table is updated. The device driver must update its page table - * in response to this callback. The update argument tells what action - * to perform. - * - * The device driver must not return from this callback until the device - * page tables are completely updated (TLBs flushed, etc); this is a - * synchronous call. - */ - int (*sync_cpu_device_pagetables)( - struct hmm_mirror *mirror, - const struct mmu_notifier_range *update); -}; - -/* - * struct hmm_mirror - mirror struct for a device driver - * - * @hmm: pointer to struct hmm (which is unique per mm_struct) - * @ops: device driver callback for HMM mirror operations - * @list: for list of mirrors of a given mm - * - * Each address space (mm_struct) being mirrored by a device must register one - * instance of an hmm_mirror struct with HMM. HMM will track the list of all - * mirrors for each mm_struct. - */ -struct hmm_mirror { - struct hmm *hmm; - const struct hmm_mirror_ops *ops; - struct list_head list; -}; - /* * Retry fault if non-blocking, drop mmap_sem and return -EAGAIN in that case. */ @@ -381,15 +226,9 @@ struct hmm_mirror { #define HMM_FAULT_SNAPSHOT (1 << 1) #ifdef CONFIG_HMM_MIRROR -int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm); -void hmm_mirror_unregister(struct hmm_mirror *mirror); - /* * Please see Documentation/vm/hmm.rst for how to use the range API. */ -int hmm_range_register(struct hmm_range *range, struct hmm_mirror *mirror); -void hmm_range_unregister(struct hmm_range *range); - long hmm_range_fault(struct hmm_range *range, unsigned int flags); long hmm_range_dma_map(struct hmm_range *range, @@ -401,24 +240,6 @@ long hmm_range_dma_unmap(struct hmm_range *range, dma_addr_t *daddrs, bool dirty); #else -int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm) -{ - return -EOPNOTSUPP; -} - -void hmm_mirror_unregister(struct hmm_mirror *mirror) -{ -} - -int hmm_range_register(struct hmm_range *range, struct hmm_mirror *mirror) -{ - return -EOPNOTSUPP; -} - -void hmm_range_unregister(struct hmm_range *range) -{ -} - static inline long hmm_range_fault(struct hmm_range *range, unsigned int flags) { return -EOPNOTSUPP; -- cgit v1.2.3 From 93f4e735b6d98ee4b7a1252d81e815a983e359f2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Nov 2019 14:45:28 +0100 Subject: mm/hmm: remove hmm_range_dma_map and hmm_range_dma_unmap These two functions have never been used since they were added. Link: https://lore.kernel.org/r/20191113134528.21187-1-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: John Hubbard Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- include/linux/hmm.h | 23 ----------------------- 1 file changed, 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 1225b3c87aba..ddf9f7144c43 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -230,34 +230,11 @@ static inline uint64_t hmm_device_entry_from_pfn(const struct hmm_range *range, * Please see Documentation/vm/hmm.rst for how to use the range API. */ long hmm_range_fault(struct hmm_range *range, unsigned int flags); - -long hmm_range_dma_map(struct hmm_range *range, - struct device *device, - dma_addr_t *daddrs, - unsigned int flags); -long hmm_range_dma_unmap(struct hmm_range *range, - struct device *device, - dma_addr_t *daddrs, - bool dirty); #else static inline long hmm_range_fault(struct hmm_range *range, unsigned int flags) { return -EOPNOTSUPP; } - -static inline long hmm_range_dma_map(struct hmm_range *range, - struct device *device, dma_addr_t *daddrs, - unsigned int flags) -{ - return -EOPNOTSUPP; -} - -static inline long hmm_range_dma_unmap(struct hmm_range *range, - struct device *device, - dma_addr_t *daddrs, bool dirty) -{ - return -EOPNOTSUPP; -} #endif /* -- cgit v1.2.3 From d46b7e4fb06037a61415f5b6964fcf632ee1dc34 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 21 Nov 2019 00:36:22 +0000 Subject: net: phylink: rename mac_link_state() op to mac_pcs_get_state() Rename the mac_link_state() method to mac_pcs_get_state() to make it clear that it should be returning the MACs PCS current state, which is used for inband negotiation rather than just reading back what the MAC has been configured for. Update the documentation to explicitly mention that this is for inband. We drop the return value as well; most of phylink doesn't check the return value and it is not clear what it should do on error - instead arrange for state->link to be false. Signed-off-by: Russell King Signed-off-by: Jakub Kicinski --- include/linux/phylink.h | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 300ecdb6790a..fed5488e3c75 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -72,7 +72,7 @@ struct phylink_config { /** * struct phylink_mac_ops - MAC operations structure. * @validate: Validate and update the link configuration. - * @mac_link_state: Read the current link state from the hardware. + * @mac_pcs_get_state: Read the current link state from the hardware. * @mac_config: configure the MAC for the selected mode and state. * @mac_an_restart: restart 802.3z BaseX autonegotiation. * @mac_link_down: take the link down. @@ -84,8 +84,8 @@ struct phylink_mac_ops { void (*validate)(struct phylink_config *config, unsigned long *supported, struct phylink_link_state *state); - int (*mac_link_state)(struct phylink_config *config, - struct phylink_link_state *state); + void (*mac_pcs_get_state)(struct phylink_config *config, + struct phylink_link_state *state); void (*mac_config)(struct phylink_config *config, unsigned int mode, const struct phylink_link_state *state); void (*mac_an_restart)(struct phylink_config *config); @@ -127,18 +127,19 @@ void validate(struct phylink_config *config, unsigned long *supported, struct phylink_link_state *state); /** - * mac_link_state() - Read the current link state from the hardware + * mac_pcs_get_state() - Read the current inband link state from the hardware * @config: a pointer to a &struct phylink_config. * @state: a pointer to a &struct phylink_link_state. * - * Read the current link state from the MAC, reporting the current - * speed in @state->speed, duplex mode in @state->duplex, pause mode - * in @state->pause using the %MLO_PAUSE_RX and %MLO_PAUSE_TX bits, - * negotiation completion state in @state->an_complete, and link - * up state in @state->link. + * Read the current inband link state from the MAC PCS, reporting the + * current speed in @state->speed, duplex mode in @state->duplex, pause + * mode in @state->pause using the %MLO_PAUSE_RX and %MLO_PAUSE_TX bits, + * negotiation completion state in @state->an_complete, and link up state + * in @state->link. If possible, @state->lp_advertising should also be + * populated. */ -int mac_link_state(struct phylink_config *config, - struct phylink_link_state *state); +void mac_pcs_get_state(struct phylink_config *config, + struct phylink_link_state *state); /** * mac_config() - configure the MAC for the selected mode and state @@ -166,7 +167,7 @@ int mac_link_state(struct phylink_config *config, * 1000base-X or Cisco SGMII mode depending on the @state->interface * mode). In both cases, link state management (whether the link * is up or not) is performed by the MAC, and reported via the - * mac_link_state() callback. Changes in link state must be made + * mac_pcs_get_state() callback. Changes in link state must be made * by calling phylink_mac_change(). * * If in 802.3z mode, the link speed is fixed, dependent on the -- cgit v1.2.3 From 4b3da77b72ad6b3c48c6fe4a395ace7db39a12c5 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 22 Nov 2019 21:07:54 +0100 Subject: bpf, x86: Generalize and extend bpf_arch_text_poke for direct jumps Add BPF_MOD_{NOP_TO_JUMP,JUMP_TO_JUMP,JUMP_TO_NOP} patching for x86 JIT in order to be able to patch direct jumps or nop them out. We need this facility in order to patch tail call jumps and in later work also BPF static keys. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/aa4784196a8e5e985af4b30a4fe5336bce6e9643.1574452833.git.daniel@iogearbox.net --- include/linux/bpf.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e89e86122233..7978b617caa8 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1284,10 +1284,16 @@ static inline u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type, #endif /* CONFIG_INET */ enum bpf_text_poke_type { + /* All call-related pokes. */ BPF_MOD_NOP_TO_CALL, BPF_MOD_CALL_TO_CALL, BPF_MOD_CALL_TO_NOP, + /* All jump-related pokes. */ + BPF_MOD_NOP_TO_JUMP, + BPF_MOD_JUMP_TO_JUMP, + BPF_MOD_JUMP_TO_NOP, }; + int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, void *addr1, void *addr2); -- cgit v1.2.3 From 6332be04c039a72fca32ed0a4265bac58d606bb6 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 22 Nov 2019 21:07:55 +0100 Subject: bpf: Move bpf_free_used_maps into sleepable section We later on are going to need a sleepable context as opposed to plain RCU callback in order to untrack programs we need to poke at runtime and tracking as well as image update is performed under mutex. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/09823b1d5262876e9b83a8e75df04cf0467357a4.1574452833.git.daniel@iogearbox.net --- include/linux/bpf.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 7978b617caa8..561b920f0bf7 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1031,6 +1031,10 @@ static inline int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, { return -ENOTSUPP; } + +static inline void bpf_map_put(struct bpf_map *map) +{ +} #endif /* CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, -- cgit v1.2.3 From 2beee5f57441413b64a9c2bd657e17beabb98d1c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 22 Nov 2019 21:07:56 +0100 Subject: bpf: Move owner type, jited info into array auxiliary data We're going to extend this with further information which is only relevant for prog array at this point. Given this info is not used in critical path, move it into its own structure such that the main array map structure can be kept on diet. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/b9ddccdb0f6f7026489ee955f16c96381e1e7238.1574452833.git.daniel@iogearbox.net --- include/linux/bpf.h | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 561b920f0bf7..c3b29061284e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -560,17 +560,21 @@ struct bpf_prog_aux { }; }; +struct bpf_array_aux { + /* 'Ownership' of prog array is claimed by the first program that + * is going to use this map or by the first program which FD is + * stored in the map to make sure that all callers and callees have + * the same prog type and JITed flag. + */ + enum bpf_prog_type type; + bool jited; +}; + struct bpf_array { struct bpf_map map; u32 elem_size; u32 index_mask; - /* 'ownership' of prog_array is claimed by the first program that - * is going to use this map or by the first program which FD is stored - * in the map to make sure that all callers and callees have the same - * prog_type and JITed flag - */ - enum bpf_prog_type owner_prog_type; - bool owner_jited; + struct bpf_array_aux *aux; union { char value[0] __aligned(8); void *ptrs[0] __aligned(8); -- cgit v1.2.3 From a66886fe6c24ebeeb6dc10fbd9b75158029eacf7 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 22 Nov 2019 21:07:57 +0100 Subject: bpf: Add initial poke descriptor table for jit images Add initial poke table data structures and management to the BPF prog that can later be used by JITs. Also add an instance of poke specific data for tail call maps; plan for later work is to extend this also for BPF static keys. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/1db285ec2ea4207ee0455b3f8e191a4fc58b9ade.1574452833.git.daniel@iogearbox.net --- include/linux/bpf.h | 20 ++++++++++++++++++++ include/linux/filter.h | 10 ++++++++++ 2 files changed, 30 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c3b29061284e..312983bf7faa 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -488,6 +488,24 @@ struct bpf_func_info_aux { bool unreliable; }; +enum bpf_jit_poke_reason { + BPF_POKE_REASON_TAIL_CALL, +}; + +/* Descriptor of pokes pointing /into/ the JITed image. */ +struct bpf_jit_poke_descriptor { + void *ip; + union { + struct { + struct bpf_map *map; + u32 key; + } tail_call; + }; + bool ip_stable; + u8 adj_off; + u16 reason; +}; + struct bpf_prog_aux { atomic64_t refcnt; u32 used_map_cnt; @@ -513,6 +531,8 @@ struct bpf_prog_aux { const char *attach_func_name; struct bpf_prog **func; void *jit_data; /* JIT specific data. arch dependent */ + struct bpf_jit_poke_descriptor *poke_tab; + u32 size_poke_tab; struct latch_tree_node ksym_tnode; struct list_head ksym_lnode; const struct bpf_prog_ops *ops; diff --git a/include/linux/filter.h b/include/linux/filter.h index ad80e9c6111c..796b60d8cc6c 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -952,6 +952,9 @@ void *bpf_jit_alloc_exec(unsigned long size); void bpf_jit_free_exec(void *addr); void bpf_jit_free(struct bpf_prog *fp); +int bpf_jit_add_poke_descriptor(struct bpf_prog *prog, + struct bpf_jit_poke_descriptor *poke); + int bpf_jit_get_func_addr(const struct bpf_prog *prog, const struct bpf_insn *insn, bool extra_pass, u64 *func_addr, bool *func_addr_fixed); @@ -1055,6 +1058,13 @@ static inline bool bpf_prog_ebpf_jited(const struct bpf_prog *fp) return false; } +static inline int +bpf_jit_add_poke_descriptor(struct bpf_prog *prog, + struct bpf_jit_poke_descriptor *poke) +{ + return -ENOTSUPP; +} + static inline void bpf_jit_free(struct bpf_prog *fp) { bpf_prog_unlock_free(fp); -- cgit v1.2.3 From da765a2f599304a81a25e77908d1790414ecdbb6 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 22 Nov 2019 21:07:58 +0100 Subject: bpf: Add poke dependency tracking for prog array maps This work adds program tracking to prog array maps. This is needed such that upon prog array updates/deletions we can fix up all programs which make use of this tail call map. We add ops->map_poke_{un,}track() helpers to maps to maintain the list of programs and ops->map_poke_run() for triggering the actual update. bpf_array_aux is extended to contain the list head and poke_mutex in order to serialize program patching during updates/deletions. bpf_free_used_maps() will untrack the program shortly before dropping the reference to the map. For clearing out the prog array once all urefs are dropped we need to use schedule_work() to have a sleepable context. The prog_array_map_poke_run() is triggered during updates/deletions and walks the maintained prog list. It checks in their poke_tabs whether the map and key is matching and runs the actual bpf_arch_text_poke() for patching in the nop or new jmp location. Depending on the type of update, we use one of BPF_MOD_{NOP_TO_JUMP,JUMP_TO_NOP,JUMP_TO_JUMP}. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/1fb364bb3c565b3e415d5ea348f036ff379e779d.1574452833.git.daniel@iogearbox.net --- include/linux/bpf.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 312983bf7faa..c2f07fd410c1 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -22,6 +22,7 @@ struct bpf_verifier_env; struct bpf_verifier_log; struct perf_event; struct bpf_prog; +struct bpf_prog_aux; struct bpf_map; struct sock; struct seq_file; @@ -64,6 +65,12 @@ struct bpf_map_ops { const struct btf_type *key_type, const struct btf_type *value_type); + /* Prog poke tracking helpers. */ + int (*map_poke_track)(struct bpf_map *map, struct bpf_prog_aux *aux); + void (*map_poke_untrack)(struct bpf_map *map, struct bpf_prog_aux *aux); + void (*map_poke_run)(struct bpf_map *map, u32 key, struct bpf_prog *old, + struct bpf_prog *new); + /* Direct value access helpers. */ int (*map_direct_value_addr)(const struct bpf_map *map, u64 *imm, u32 off); @@ -588,6 +595,11 @@ struct bpf_array_aux { */ enum bpf_prog_type type; bool jited; + /* Programs with direct jumps into programs part of this array. */ + struct list_head poke_progs; + struct bpf_map *map; + struct mutex poke_mutex; + struct work_struct work; }; struct bpf_array { -- cgit v1.2.3 From d2e4c1e6c2947269346054ac8937ccfe9e0bcc6b Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 22 Nov 2019 21:07:59 +0100 Subject: bpf: Constant map key tracking for prog array pokes Add tracking of constant keys into tail call maps. The signature of bpf_tail_call_proto is that arg1 is ctx, arg2 map pointer and arg3 is a index key. The direct call approach for tail calls can be enabled if the verifier asserted that for all branches leading to the tail call helper invocation, the map pointer and index key were both constant and the same. Tracking of map pointers we already do from prior work via c93552c443eb ("bpf: properly enforce index mask to prevent out-of-bounds speculation") and 09772d92cd5a ("bpf: avoid retpoline for lookup/update/ delete calls on maps"). Given the tail call map index key is not on stack but directly in the register, we can add similar tracking approach and later in fixup_bpf_calls() add a poke descriptor to the progs poke_tab with the relevant information for the JITing phase. We internally reuse insn->imm for the rewritten BPF_JMP | BPF_TAIL_CALL instruction in order to point into the prog's poke_tab, and keep insn->imm as 0 as indicator that current indirect tail call emission must be used. Note that publishing to the tracker must happen at the end of fixup_bpf_calls() since adding elements to the poke_tab reallocates its memory, so we need to wait until its in final state. Future work can generalize and add similar approach to optimize plain array map lookups. Difference there is that we need to look into the key value that sits on stack. For clarity in bpf_insn_aux_data, map_state has been renamed into map_ptr_state, so we get map_{ptr,key}_state as trackers. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/e8db37f6b2ae60402fa40216c96738ee9b316c32.1574452833.git.daniel@iogearbox.net --- include/linux/bpf_verifier.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index cdd08bf0ec06..26e40de9ef55 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -293,7 +293,7 @@ struct bpf_verifier_state_list { struct bpf_insn_aux_data { union { enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ - unsigned long map_state; /* pointer/poison value for maps */ + unsigned long map_ptr_state; /* pointer/poison value for maps */ s32 call_imm; /* saved imm field of call insn */ u32 alu_limit; /* limit for add/sub register with pointer */ struct { @@ -301,6 +301,7 @@ struct bpf_insn_aux_data { u32 map_off; /* offset from value base address */ }; }; + u64 map_key_state; /* constant (32 bit) key tracking for maps */ int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ int sanitize_stack_off; /* stack slot to be cleared */ bool seen; /* this insn was processed by the verifier */ -- cgit v1.2.3 From b8cd76ca4ae34731d47cd6a876d912a08efcc240 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 23 Nov 2019 21:37:31 +0100 Subject: bpf: Add bpf_jit_blinding_enabled for !CONFIG_BPF_JIT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a definition of bpf_jit_blinding_enabled() when CONFIG_BPF_JIT is not set in order to fix a recent build regression: [...] CC kernel/bpf/verifier.o CC kernel/bpf/inode.o kernel/bpf/verifier.c: In function ‘fixup_bpf_calls’: kernel/bpf/verifier.c:9132:25: error: implicit declaration of function ‘bpf_jit_blinding_enabled’; did you mean ‘bpf_jit_kallsyms_enabled’? [-Werror=implicit-function-declaration] 9132 | bool expect_blinding = bpf_jit_blinding_enabled(prog); | ^~~~~~~~~~~~~~~~~~~~~~~~ | bpf_jit_kallsyms_enabled CC kernel/bpf/helpers.o CC kernel/bpf/hashtab.o [...] Fixes: d2e4c1e6c294 ("bpf: Constant map key tracking for prog array pokes") Reported-by: Jakub Sitnicki Reported-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/40baf8f3507cac4851a310578edfb98ce73b5605.1574541375.git.daniel@iogearbox.net --- include/linux/filter.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 796b60d8cc6c..1b1e8b8f88da 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1053,6 +1053,11 @@ static inline bool ebpf_jit_enabled(void) return false; } +static inline bool bpf_jit_blinding_enabled(struct bpf_prog *prog) +{ + return false; +} + static inline bool bpf_prog_ebpf_jited(const struct bpf_prog *fp) { return false; -- cgit v1.2.3 From b553a6ec570044fc1ae300c6fb24f9ce204c5894 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 24 Nov 2019 01:39:42 +0100 Subject: bpf: Simplify __bpf_arch_text_poke poke type handling Given that we have BPF_MOD_NOP_TO_{CALL,JUMP}, BPF_MOD_{CALL,JUMP}_TO_NOP and BPF_MOD_{CALL,JUMP}_TO_{CALL,JUMP} poke types and that we also pass in old_addr as well as new_addr, it's a bit redundant and unnecessarily complicates __bpf_arch_text_poke() itself since we can derive the same from the *_addr that were passed in. Hence simplify and use BPF_MOD_{CALL,JUMP} as types which also allows to clean up call-sites. In addition to that, __bpf_arch_text_poke() currently verifies that text matches expected old_insn before we invoke text_poke_bp(). Also add a check on new_insn and skip rewrite if it already matches. Reason why this is rather useful is that it avoids making any special casing in prog_array_map_poke_run() when old and new prog were NULL and has the benefit that also for this case we perform a check on text whether it really matches our expectations. Suggested-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/fcb00a2b0b288d6c73de4ef58116a821c8fe8f2f.1574555798.git.daniel@iogearbox.net --- include/linux/bpf.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c2f07fd410c1..35903f148be5 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1324,14 +1324,8 @@ static inline u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type, #endif /* CONFIG_INET */ enum bpf_text_poke_type { - /* All call-related pokes. */ - BPF_MOD_NOP_TO_CALL, - BPF_MOD_CALL_TO_CALL, - BPF_MOD_CALL_TO_NOP, - /* All jump-related pokes. */ - BPF_MOD_NOP_TO_JUMP, - BPF_MOD_JUMP_TO_JUMP, - BPF_MOD_JUMP_TO_NOP, + BPF_MOD_CALL, + BPF_MOD_JUMP, }; int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, -- cgit v1.2.3 From 23e6b169c9917fbd77534f8c5f378cb073f548bd Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 21 Nov 2019 11:58:53 +0000 Subject: locking/refcount: Define constants for saturation and max refcount values The REFCOUNT_FULL implementation uses a different saturation point than the x86 implementation, which means that the shared refcount code in lib/refcount.c (e.g. refcount_dec_not_one()) needs to be aware of the difference. Rather than duplicate the definitions from the lkdtm driver, instead move them into and update all references accordingly. Signed-off-by: Will Deacon Reviewed-by: Ard Biesheuvel Reviewed-by: Kees Cook Tested-by: Hanjun Guo Cc: Ard Biesheuvel Cc: Elena Reshetova Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: https://lkml.kernel.org/r/20191121115902.2551-2-will@kernel.org Signed-off-by: Ingo Molnar --- include/linux/refcount.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/refcount.h b/include/linux/refcount.h index e28cce21bad6..79f62e8d2256 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -4,6 +4,7 @@ #include #include +#include #include struct mutex; @@ -12,7 +13,7 @@ struct mutex; * struct refcount_t - variant of atomic_t specialized for reference counts * @refs: atomic_t counter field * - * The counter saturates at UINT_MAX and will not move once + * The counter saturates at REFCOUNT_SATURATED and will not move once * there. This avoids wrapping the counter and causing 'spurious' * use-after-free bugs. */ @@ -56,6 +57,9 @@ extern void refcount_dec_checked(refcount_t *r); #ifdef CONFIG_REFCOUNT_FULL +#define REFCOUNT_MAX (UINT_MAX - 1) +#define REFCOUNT_SATURATED UINT_MAX + #define refcount_add_not_zero refcount_add_not_zero_checked #define refcount_add refcount_add_checked @@ -68,6 +72,10 @@ extern void refcount_dec_checked(refcount_t *r); #define refcount_dec refcount_dec_checked #else + +#define REFCOUNT_MAX INT_MAX +#define REFCOUNT_SATURATED (INT_MIN / 2) + # ifdef CONFIG_ARCH_HAS_REFCOUNT # include # else -- cgit v1.2.3 From 97a1420adf0cdf0cf6f41bab0b2acf658c96b94b Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 21 Nov 2019 11:58:54 +0000 Subject: locking/refcount: Ensure integer operands are treated as signed In preparation for changing the saturation point of REFCOUNT_FULL to INT_MIN/2, change the type of integer operands passed into the API from 'unsigned int' to 'int' so that we can avoid casting during comparisons when we don't want to fall foul of C integral conversion rules for signed and unsigned types. Since the kernel is compiled with '-fno-strict-overflow', we don't need to worry about the UB introduced by signed overflow here. Furthermore, we're already making heavy use of the atomic_t API, which operates exclusively on signed types. Signed-off-by: Will Deacon Reviewed-by: Ard Biesheuvel Reviewed-by: Kees Cook Tested-by: Hanjun Guo Cc: Ard Biesheuvel Cc: Elena Reshetova Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: https://lkml.kernel.org/r/20191121115902.2551-3-will@kernel.org Signed-off-by: Ingo Molnar --- include/linux/refcount.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/refcount.h b/include/linux/refcount.h index 79f62e8d2256..89066a1471dd 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -28,7 +28,7 @@ typedef struct refcount_struct { * @r: the refcount * @n: value to which the refcount will be set */ -static inline void refcount_set(refcount_t *r, unsigned int n) +static inline void refcount_set(refcount_t *r, int n) { atomic_set(&r->refs, n); } @@ -44,13 +44,13 @@ static inline unsigned int refcount_read(const refcount_t *r) return atomic_read(&r->refs); } -extern __must_check bool refcount_add_not_zero_checked(unsigned int i, refcount_t *r); -extern void refcount_add_checked(unsigned int i, refcount_t *r); +extern __must_check bool refcount_add_not_zero_checked(int i, refcount_t *r); +extern void refcount_add_checked(int i, refcount_t *r); extern __must_check bool refcount_inc_not_zero_checked(refcount_t *r); extern void refcount_inc_checked(refcount_t *r); -extern __must_check bool refcount_sub_and_test_checked(unsigned int i, refcount_t *r); +extern __must_check bool refcount_sub_and_test_checked(int i, refcount_t *r); extern __must_check bool refcount_dec_and_test_checked(refcount_t *r); extern void refcount_dec_checked(refcount_t *r); @@ -79,12 +79,12 @@ extern void refcount_dec_checked(refcount_t *r); # ifdef CONFIG_ARCH_HAS_REFCOUNT # include # else -static inline __must_check bool refcount_add_not_zero(unsigned int i, refcount_t *r) +static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r) { return atomic_add_unless(&r->refs, i, 0); } -static inline void refcount_add(unsigned int i, refcount_t *r) +static inline void refcount_add(int i, refcount_t *r) { atomic_add(i, &r->refs); } @@ -99,7 +99,7 @@ static inline void refcount_inc(refcount_t *r) atomic_inc(&r->refs); } -static inline __must_check bool refcount_sub_and_test(unsigned int i, refcount_t *r) +static inline __must_check bool refcount_sub_and_test(int i, refcount_t *r) { return atomic_sub_and_test(i, &r->refs); } -- cgit v1.2.3 From 7221762c48c6bbbcc6cc51d8b803c06930215e34 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 21 Nov 2019 11:58:55 +0000 Subject: locking/refcount: Remove unused refcount_*_checked() variants The full-fat refcount implementation is exposed via a set of functions suffixed with "_checked()", the idea being that code can choose to use the more expensive, yet more secure implementation on a case-by-case basis. In reality, this hasn't happened, so with a grand total of zero users, let's remove the checked variants for now by simply dropping the suffix and predicating the out-of-line functions on CONFIG_REFCOUNT_FULL=y. Signed-off-by: Will Deacon Reviewed-by: Ard Biesheuvel Reviewed-by: Kees Cook Tested-by: Hanjun Guo Cc: Ard Biesheuvel Cc: Elena Reshetova Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: https://lkml.kernel.org/r/20191121115902.2551-4-will@kernel.org Signed-off-by: Ingo Molnar --- include/linux/refcount.h | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/refcount.h b/include/linux/refcount.h index 89066a1471dd..edd505d1a23b 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -44,32 +44,21 @@ static inline unsigned int refcount_read(const refcount_t *r) return atomic_read(&r->refs); } -extern __must_check bool refcount_add_not_zero_checked(int i, refcount_t *r); -extern void refcount_add_checked(int i, refcount_t *r); - -extern __must_check bool refcount_inc_not_zero_checked(refcount_t *r); -extern void refcount_inc_checked(refcount_t *r); - -extern __must_check bool refcount_sub_and_test_checked(int i, refcount_t *r); - -extern __must_check bool refcount_dec_and_test_checked(refcount_t *r); -extern void refcount_dec_checked(refcount_t *r); - #ifdef CONFIG_REFCOUNT_FULL #define REFCOUNT_MAX (UINT_MAX - 1) #define REFCOUNT_SATURATED UINT_MAX -#define refcount_add_not_zero refcount_add_not_zero_checked -#define refcount_add refcount_add_checked +extern __must_check bool refcount_add_not_zero(int i, refcount_t *r); +extern void refcount_add(int i, refcount_t *r); -#define refcount_inc_not_zero refcount_inc_not_zero_checked -#define refcount_inc refcount_inc_checked +extern __must_check bool refcount_inc_not_zero(refcount_t *r); +extern void refcount_inc(refcount_t *r); -#define refcount_sub_and_test refcount_sub_and_test_checked +extern __must_check bool refcount_sub_and_test(int i, refcount_t *r); -#define refcount_dec_and_test refcount_dec_and_test_checked -#define refcount_dec refcount_dec_checked +extern __must_check bool refcount_dec_and_test(refcount_t *r); +extern void refcount_dec(refcount_t *r); #else -- cgit v1.2.3 From 77e9971c79c29542ab7dd4140f9343bf2ff36158 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 21 Nov 2019 11:58:56 +0000 Subject: locking/refcount: Move the bulk of the REFCOUNT_FULL implementation into the header In an effort to improve performance of the REFCOUNT_FULL implementation, move the bulk of its functions into linux/refcount.h. This allows them to be inlined in the same way as if they had been provided via CONFIG_ARCH_HAS_REFCOUNT. Signed-off-by: Will Deacon Reviewed-by: Ard Biesheuvel Reviewed-by: Kees Cook Tested-by: Hanjun Guo Cc: Ard Biesheuvel Cc: Elena Reshetova Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: https://lkml.kernel.org/r/20191121115902.2551-5-will@kernel.org Signed-off-by: Ingo Molnar --- include/linux/refcount.h | 237 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 228 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/refcount.h b/include/linux/refcount.h index edd505d1a23b..e719b5b1220e 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -45,22 +45,241 @@ static inline unsigned int refcount_read(const refcount_t *r) } #ifdef CONFIG_REFCOUNT_FULL +#include #define REFCOUNT_MAX (UINT_MAX - 1) #define REFCOUNT_SATURATED UINT_MAX -extern __must_check bool refcount_add_not_zero(int i, refcount_t *r); -extern void refcount_add(int i, refcount_t *r); +/* + * Variant of atomic_t specialized for reference counts. + * + * The interface matches the atomic_t interface (to aid in porting) but only + * provides the few functions one should use for reference counting. + * + * It differs in that the counter saturates at REFCOUNT_SATURATED and will not + * move once there. This avoids wrapping the counter and causing 'spurious' + * use-after-free issues. + * + * Memory ordering rules are slightly relaxed wrt regular atomic_t functions + * and provide only what is strictly required for refcounts. + * + * The increments are fully relaxed; these will not provide ordering. The + * rationale is that whatever is used to obtain the object we're increasing the + * reference count on will provide the ordering. For locked data structures, + * its the lock acquire, for RCU/lockless data structures its the dependent + * load. + * + * Do note that inc_not_zero() provides a control dependency which will order + * future stores against the inc, this ensures we'll never modify the object + * if we did not in fact acquire a reference. + * + * The decrements will provide release order, such that all the prior loads and + * stores will be issued before, it also provides a control dependency, which + * will order us against the subsequent free(). + * + * The control dependency is against the load of the cmpxchg (ll/sc) that + * succeeded. This means the stores aren't fully ordered, but this is fine + * because the 1->0 transition indicates no concurrency. + * + * Note that the allocator is responsible for ordering things between free() + * and alloc(). + * + * The decrements dec_and_test() and sub_and_test() also provide acquire + * ordering on success. + * + */ + +/** + * refcount_add_not_zero - add a value to a refcount unless it is 0 + * @i: the value to add to the refcount + * @r: the refcount + * + * Will saturate at REFCOUNT_SATURATED and WARN. + * + * Provides no memory ordering, it is assumed the caller has guaranteed the + * object memory to be stable (RCU, etc.). It does provide a control dependency + * and thereby orders future stores. See the comment on top. + * + * Use of this function is not recommended for the normal reference counting + * use case in which references are taken and released one at a time. In these + * cases, refcount_inc(), or one of its variants, should instead be used to + * increment a reference count. + * + * Return: false if the passed refcount is 0, true otherwise + */ +static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r) +{ + unsigned int new, val = atomic_read(&r->refs); + + do { + if (!val) + return false; + + if (unlikely(val == REFCOUNT_SATURATED)) + return true; + + new = val + i; + if (new < val) + new = REFCOUNT_SATURATED; + + } while (!atomic_try_cmpxchg_relaxed(&r->refs, &val, new)); + + WARN_ONCE(new == REFCOUNT_SATURATED, + "refcount_t: saturated; leaking memory.\n"); + + return true; +} + +/** + * refcount_add - add a value to a refcount + * @i: the value to add to the refcount + * @r: the refcount + * + * Similar to atomic_add(), but will saturate at REFCOUNT_SATURATED and WARN. + * + * Provides no memory ordering, it is assumed the caller has guaranteed the + * object memory to be stable (RCU, etc.). It does provide a control dependency + * and thereby orders future stores. See the comment on top. + * + * Use of this function is not recommended for the normal reference counting + * use case in which references are taken and released one at a time. In these + * cases, refcount_inc(), or one of its variants, should instead be used to + * increment a reference count. + */ +static inline void refcount_add(int i, refcount_t *r) +{ + WARN_ONCE(!refcount_add_not_zero(i, r), "refcount_t: addition on 0; use-after-free.\n"); +} + +/** + * refcount_inc_not_zero - increment a refcount unless it is 0 + * @r: the refcount to increment + * + * Similar to atomic_inc_not_zero(), but will saturate at REFCOUNT_SATURATED + * and WARN. + * + * Provides no memory ordering, it is assumed the caller has guaranteed the + * object memory to be stable (RCU, etc.). It does provide a control dependency + * and thereby orders future stores. See the comment on top. + * + * Return: true if the increment was successful, false otherwise + */ +static inline __must_check bool refcount_inc_not_zero(refcount_t *r) +{ + unsigned int new, val = atomic_read(&r->refs); + + do { + new = val + 1; -extern __must_check bool refcount_inc_not_zero(refcount_t *r); -extern void refcount_inc(refcount_t *r); + if (!val) + return false; -extern __must_check bool refcount_sub_and_test(int i, refcount_t *r); + if (unlikely(!new)) + return true; -extern __must_check bool refcount_dec_and_test(refcount_t *r); -extern void refcount_dec(refcount_t *r); + } while (!atomic_try_cmpxchg_relaxed(&r->refs, &val, new)); + + WARN_ONCE(new == REFCOUNT_SATURATED, + "refcount_t: saturated; leaking memory.\n"); + + return true; +} + +/** + * refcount_inc - increment a refcount + * @r: the refcount to increment + * + * Similar to atomic_inc(), but will saturate at REFCOUNT_SATURATED and WARN. + * + * Provides no memory ordering, it is assumed the caller already has a + * reference on the object. + * + * Will WARN if the refcount is 0, as this represents a possible use-after-free + * condition. + */ +static inline void refcount_inc(refcount_t *r) +{ + WARN_ONCE(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-after-free.\n"); +} + +/** + * refcount_sub_and_test - subtract from a refcount and test if it is 0 + * @i: amount to subtract from the refcount + * @r: the refcount + * + * Similar to atomic_dec_and_test(), but it will WARN, return false and + * ultimately leak on underflow and will fail to decrement when saturated + * at REFCOUNT_SATURATED. + * + * Provides release memory ordering, such that prior loads and stores are done + * before, and provides an acquire ordering on success such that free() + * must come after. + * + * Use of this function is not recommended for the normal reference counting + * use case in which references are taken and released one at a time. In these + * cases, refcount_dec(), or one of its variants, should instead be used to + * decrement a reference count. + * + * Return: true if the resulting refcount is 0, false otherwise + */ +static inline __must_check bool refcount_sub_and_test(int i, refcount_t *r) +{ + unsigned int new, val = atomic_read(&r->refs); + + do { + if (unlikely(val == REFCOUNT_SATURATED)) + return false; + + new = val - i; + if (new > val) { + WARN_ONCE(new > val, "refcount_t: underflow; use-after-free.\n"); + return false; + } + + } while (!atomic_try_cmpxchg_release(&r->refs, &val, new)); + + if (!new) { + smp_acquire__after_ctrl_dep(); + return true; + } + return false; + +} + +/** + * refcount_dec_and_test - decrement a refcount and test if it is 0 + * @r: the refcount + * + * Similar to atomic_dec_and_test(), it will WARN on underflow and fail to + * decrement when saturated at REFCOUNT_SATURATED. + * + * Provides release memory ordering, such that prior loads and stores are done + * before, and provides an acquire ordering on success such that free() + * must come after. + * + * Return: true if the resulting refcount is 0, false otherwise + */ +static inline __must_check bool refcount_dec_and_test(refcount_t *r) +{ + return refcount_sub_and_test(1, r); +} + +/** + * refcount_dec - decrement a refcount + * @r: the refcount + * + * Similar to atomic_dec(), it will WARN on underflow and fail to decrement + * when saturated at REFCOUNT_SATURATED. + * + * Provides release memory ordering, such that prior loads and stores are done + * before. + */ +static inline void refcount_dec(refcount_t *r) +{ + WARN_ONCE(refcount_dec_and_test(r), "refcount_t: decrement hit 0; leaking memory.\n"); +} -#else +#else /* CONFIG_REFCOUNT_FULL */ #define REFCOUNT_MAX INT_MAX #define REFCOUNT_SATURATED (INT_MIN / 2) @@ -103,7 +322,7 @@ static inline void refcount_dec(refcount_t *r) atomic_dec(&r->refs); } # endif /* !CONFIG_ARCH_HAS_REFCOUNT */ -#endif /* CONFIG_REFCOUNT_FULL */ +#endif /* !CONFIG_REFCOUNT_FULL */ extern __must_check bool refcount_dec_if_one(refcount_t *r); extern __must_check bool refcount_dec_not_one(refcount_t *r); -- cgit v1.2.3 From dcb786493f3e48da3272b710028d42ec608cfda1 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 21 Nov 2019 11:58:57 +0000 Subject: locking/refcount: Improve performance of generic REFCOUNT_FULL code Rewrite the generic REFCOUNT_FULL implementation so that the saturation point is moved to INT_MIN / 2. This allows us to defer the sanity checks until after the atomic operation, which removes many uses of cmpxchg() in favour of atomic_fetch_{add,sub}(). Some crude perf results obtained from lkdtm show substantially less overhead, despite the checking: $ perf stat -r 3 -B -- echo {ATOMIC,REFCOUNT}_TIMING >/sys/kernel/debug/provoke-crash/DIRECT # arm64 ATOMIC_TIMING: 46.50451 +- 0.00134 seconds time elapsed ( +- 0.00% ) REFCOUNT_TIMING (REFCOUNT_FULL, mainline): 77.57522 +- 0.00982 seconds time elapsed ( +- 0.01% ) REFCOUNT_TIMING (REFCOUNT_FULL, this series): 48.7181 +- 0.0256 seconds time elapsed ( +- 0.05% ) # x86 ATOMIC_TIMING: 31.6225 +- 0.0776 seconds time elapsed ( +- 0.25% ) REFCOUNT_TIMING (!REFCOUNT_FULL, mainline/x86 asm): 31.6689 +- 0.0901 seconds time elapsed ( +- 0.28% ) REFCOUNT_TIMING (REFCOUNT_FULL, mainline): 53.203 +- 0.138 seconds time elapsed ( +- 0.26% ) REFCOUNT_TIMING (REFCOUNT_FULL, this series): 31.7408 +- 0.0486 seconds time elapsed ( +- 0.15% ) Signed-off-by: Will Deacon Reviewed-by: Ard Biesheuvel Reviewed-by: Kees Cook Tested-by: Hanjun Guo Tested-by: Jan Glauber Cc: Ard Biesheuvel Cc: Elena Reshetova Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: https://lkml.kernel.org/r/20191121115902.2551-6-will@kernel.org Signed-off-by: Ingo Molnar --- include/linux/refcount.h | 131 +++++++++++++++++++++++++++-------------------- 1 file changed, 75 insertions(+), 56 deletions(-) (limited to 'include/linux') diff --git a/include/linux/refcount.h b/include/linux/refcount.h index e719b5b1220e..e3b218d669ce 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -47,8 +47,8 @@ static inline unsigned int refcount_read(const refcount_t *r) #ifdef CONFIG_REFCOUNT_FULL #include -#define REFCOUNT_MAX (UINT_MAX - 1) -#define REFCOUNT_SATURATED UINT_MAX +#define REFCOUNT_MAX INT_MAX +#define REFCOUNT_SATURATED (INT_MIN / 2) /* * Variant of atomic_t specialized for reference counts. @@ -56,9 +56,47 @@ static inline unsigned int refcount_read(const refcount_t *r) * The interface matches the atomic_t interface (to aid in porting) but only * provides the few functions one should use for reference counting. * - * It differs in that the counter saturates at REFCOUNT_SATURATED and will not - * move once there. This avoids wrapping the counter and causing 'spurious' - * use-after-free issues. + * Saturation semantics + * ==================== + * + * refcount_t differs from atomic_t in that the counter saturates at + * REFCOUNT_SATURATED and will not move once there. This avoids wrapping the + * counter and causing 'spurious' use-after-free issues. In order to avoid the + * cost associated with introducing cmpxchg() loops into all of the saturating + * operations, we temporarily allow the counter to take on an unchecked value + * and then explicitly set it to REFCOUNT_SATURATED on detecting that underflow + * or overflow has occurred. Although this is racy when multiple threads + * access the refcount concurrently, by placing REFCOUNT_SATURATED roughly + * equidistant from 0 and INT_MAX we minimise the scope for error: + * + * INT_MAX REFCOUNT_SATURATED UINT_MAX + * 0 (0x7fff_ffff) (0xc000_0000) (0xffff_ffff) + * +--------------------------------+----------------+----------------+ + * <---------- bad value! ----------> + * + * (in a signed view of the world, the "bad value" range corresponds to + * a negative counter value). + * + * As an example, consider a refcount_inc() operation that causes the counter + * to overflow: + * + * int old = atomic_fetch_add_relaxed(r); + * // old is INT_MAX, refcount now INT_MIN (0x8000_0000) + * if (old < 0) + * atomic_set(r, REFCOUNT_SATURATED); + * + * If another thread also performs a refcount_inc() operation between the two + * atomic operations, then the count will continue to edge closer to 0. If it + * reaches a value of 1 before /any/ of the threads reset it to the saturated + * value, then a concurrent refcount_dec_and_test() may erroneously free the + * underlying object. Given the precise timing details involved with the + * round-robin scheduling of each thread manipulating the refcount and the need + * to hit the race multiple times in succession, there doesn't appear to be a + * practical avenue of attack even if using refcount_add() operations with + * larger increments. + * + * Memory ordering + * =============== * * Memory ordering rules are slightly relaxed wrt regular atomic_t functions * and provide only what is strictly required for refcounts. @@ -109,25 +147,19 @@ static inline unsigned int refcount_read(const refcount_t *r) */ static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r) { - unsigned int new, val = atomic_read(&r->refs); + int old = refcount_read(r); do { - if (!val) - return false; - - if (unlikely(val == REFCOUNT_SATURATED)) - return true; - - new = val + i; - if (new < val) - new = REFCOUNT_SATURATED; + if (!old) + break; + } while (!atomic_try_cmpxchg_relaxed(&r->refs, &old, old + i)); - } while (!atomic_try_cmpxchg_relaxed(&r->refs, &val, new)); - - WARN_ONCE(new == REFCOUNT_SATURATED, - "refcount_t: saturated; leaking memory.\n"); + if (unlikely(old < 0 || old + i < 0)) { + refcount_set(r, REFCOUNT_SATURATED); + WARN_ONCE(1, "refcount_t: saturated; leaking memory.\n"); + } - return true; + return old; } /** @@ -148,7 +180,13 @@ static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r) */ static inline void refcount_add(int i, refcount_t *r) { - WARN_ONCE(!refcount_add_not_zero(i, r), "refcount_t: addition on 0; use-after-free.\n"); + int old = atomic_fetch_add_relaxed(i, &r->refs); + + WARN_ONCE(!old, "refcount_t: addition on 0; use-after-free.\n"); + if (unlikely(old <= 0 || old + i <= 0)) { + refcount_set(r, REFCOUNT_SATURATED); + WARN_ONCE(old, "refcount_t: saturated; leaking memory.\n"); + } } /** @@ -166,23 +204,7 @@ static inline void refcount_add(int i, refcount_t *r) */ static inline __must_check bool refcount_inc_not_zero(refcount_t *r) { - unsigned int new, val = atomic_read(&r->refs); - - do { - new = val + 1; - - if (!val) - return false; - - if (unlikely(!new)) - return true; - - } while (!atomic_try_cmpxchg_relaxed(&r->refs, &val, new)); - - WARN_ONCE(new == REFCOUNT_SATURATED, - "refcount_t: saturated; leaking memory.\n"); - - return true; + return refcount_add_not_zero(1, r); } /** @@ -199,7 +221,7 @@ static inline __must_check bool refcount_inc_not_zero(refcount_t *r) */ static inline void refcount_inc(refcount_t *r) { - WARN_ONCE(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-after-free.\n"); + refcount_add(1, r); } /** @@ -224,26 +246,19 @@ static inline void refcount_inc(refcount_t *r) */ static inline __must_check bool refcount_sub_and_test(int i, refcount_t *r) { - unsigned int new, val = atomic_read(&r->refs); - - do { - if (unlikely(val == REFCOUNT_SATURATED)) - return false; + int old = atomic_fetch_sub_release(i, &r->refs); - new = val - i; - if (new > val) { - WARN_ONCE(new > val, "refcount_t: underflow; use-after-free.\n"); - return false; - } - - } while (!atomic_try_cmpxchg_release(&r->refs, &val, new)); - - if (!new) { + if (old == i) { smp_acquire__after_ctrl_dep(); return true; } - return false; + if (unlikely(old < 0 || old - i < 0)) { + refcount_set(r, REFCOUNT_SATURATED); + WARN_ONCE(1, "refcount_t: underflow; use-after-free.\n"); + } + + return false; } /** @@ -276,9 +291,13 @@ static inline __must_check bool refcount_dec_and_test(refcount_t *r) */ static inline void refcount_dec(refcount_t *r) { - WARN_ONCE(refcount_dec_and_test(r), "refcount_t: decrement hit 0; leaking memory.\n"); -} + int old = atomic_fetch_sub_release(1, &r->refs); + if (unlikely(old <= 1)) { + refcount_set(r, REFCOUNT_SATURATED); + WARN_ONCE(1, "refcount_t: decrement hit 0; leaking memory.\n"); + } +} #else /* CONFIG_REFCOUNT_FULL */ #define REFCOUNT_MAX INT_MAX -- cgit v1.2.3 From 1eb085d94256aaa69b00cf5a86e3c5f5bb2bc460 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 21 Nov 2019 11:58:58 +0000 Subject: locking/refcount: Move saturation warnings out of line Having the refcount saturation and warnings inline bloats the text, despite the fact that these paths should never be executed in normal operation. Move the refcount saturation and warnings out of line to reduce the image size when refcount_t checking is enabled. Relative to an x86_64 defconfig, the sizes reported by bloat-o-meter are: # defconfig+REFCOUNT_FULL, inline saturation (i.e. before this patch) Total: Before=14762076, After=14915442, chg +1.04% # defconfig+REFCOUNT_FULL, out-of-line saturation (i.e. after this patch) Total: Before=14762076, After=14835497, chg +0.50% A side-effect of this change is that we now only get one warning per refcount saturation type, rather than one per problematic call-site. Signed-off-by: Will Deacon Reviewed-by: Ard Biesheuvel Reviewed-by: Kees Cook Tested-by: Hanjun Guo Cc: Ard Biesheuvel Cc: Elena Reshetova Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: https://lkml.kernel.org/r/20191121115902.2551-7-will@kernel.org Signed-off-by: Ingo Molnar --- include/linux/refcount.h | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/refcount.h b/include/linux/refcount.h index e3b218d669ce..1cd0a876a789 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -23,6 +23,16 @@ typedef struct refcount_struct { #define REFCOUNT_INIT(n) { .refs = ATOMIC_INIT(n), } +enum refcount_saturation_type { + REFCOUNT_ADD_NOT_ZERO_OVF, + REFCOUNT_ADD_OVF, + REFCOUNT_ADD_UAF, + REFCOUNT_SUB_UAF, + REFCOUNT_DEC_LEAK, +}; + +void refcount_warn_saturate(refcount_t *r, enum refcount_saturation_type t); + /** * refcount_set - set a refcount's value * @r: the refcount @@ -154,10 +164,8 @@ static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r) break; } while (!atomic_try_cmpxchg_relaxed(&r->refs, &old, old + i)); - if (unlikely(old < 0 || old + i < 0)) { - refcount_set(r, REFCOUNT_SATURATED); - WARN_ONCE(1, "refcount_t: saturated; leaking memory.\n"); - } + if (unlikely(old < 0 || old + i < 0)) + refcount_warn_saturate(r, REFCOUNT_ADD_NOT_ZERO_OVF); return old; } @@ -182,11 +190,10 @@ static inline void refcount_add(int i, refcount_t *r) { int old = atomic_fetch_add_relaxed(i, &r->refs); - WARN_ONCE(!old, "refcount_t: addition on 0; use-after-free.\n"); - if (unlikely(old <= 0 || old + i <= 0)) { - refcount_set(r, REFCOUNT_SATURATED); - WARN_ONCE(old, "refcount_t: saturated; leaking memory.\n"); - } + if (unlikely(!old)) + refcount_warn_saturate(r, REFCOUNT_ADD_UAF); + else if (unlikely(old < 0 || old + i < 0)) + refcount_warn_saturate(r, REFCOUNT_ADD_OVF); } /** @@ -253,10 +260,8 @@ static inline __must_check bool refcount_sub_and_test(int i, refcount_t *r) return true; } - if (unlikely(old < 0 || old - i < 0)) { - refcount_set(r, REFCOUNT_SATURATED); - WARN_ONCE(1, "refcount_t: underflow; use-after-free.\n"); - } + if (unlikely(old < 0 || old - i < 0)) + refcount_warn_saturate(r, REFCOUNT_SUB_UAF); return false; } @@ -291,12 +296,8 @@ static inline __must_check bool refcount_dec_and_test(refcount_t *r) */ static inline void refcount_dec(refcount_t *r) { - int old = atomic_fetch_sub_release(1, &r->refs); - - if (unlikely(old <= 1)) { - refcount_set(r, REFCOUNT_SATURATED); - WARN_ONCE(1, "refcount_t: decrement hit 0; leaking memory.\n"); - } + if (unlikely(atomic_fetch_sub_release(1, &r->refs) <= 1)) + refcount_warn_saturate(r, REFCOUNT_DEC_LEAK); } #else /* CONFIG_REFCOUNT_FULL */ -- cgit v1.2.3 From 65b008552469f1c37f5e06e0016924502e40b4f5 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 21 Nov 2019 11:58:59 +0000 Subject: locking/refcount: Consolidate REFCOUNT_{MAX,SATURATED} definitions The definitions of REFCOUNT_MAX and REFCOUNT_SATURATED are the same, regardless of CONFIG_REFCOUNT_FULL, so consolidate them into a single pair of definitions. Signed-off-by: Will Deacon Reviewed-by: Ard Biesheuvel Reviewed-by: Kees Cook Tested-by: Hanjun Guo Cc: Ard Biesheuvel Cc: Elena Reshetova Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: https://lkml.kernel.org/r/20191121115902.2551-8-will@kernel.org Signed-off-by: Ingo Molnar --- include/linux/refcount.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/refcount.h b/include/linux/refcount.h index 1cd0a876a789..757d4630115c 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -22,6 +22,8 @@ typedef struct refcount_struct { } refcount_t; #define REFCOUNT_INIT(n) { .refs = ATOMIC_INIT(n), } +#define REFCOUNT_MAX INT_MAX +#define REFCOUNT_SATURATED (INT_MIN / 2) enum refcount_saturation_type { REFCOUNT_ADD_NOT_ZERO_OVF, @@ -57,9 +59,6 @@ static inline unsigned int refcount_read(const refcount_t *r) #ifdef CONFIG_REFCOUNT_FULL #include -#define REFCOUNT_MAX INT_MAX -#define REFCOUNT_SATURATED (INT_MIN / 2) - /* * Variant of atomic_t specialized for reference counts. * @@ -300,10 +299,6 @@ static inline void refcount_dec(refcount_t *r) refcount_warn_saturate(r, REFCOUNT_DEC_LEAK); } #else /* CONFIG_REFCOUNT_FULL */ - -#define REFCOUNT_MAX INT_MAX -#define REFCOUNT_SATURATED (INT_MIN / 2) - # ifdef CONFIG_ARCH_HAS_REFCOUNT # include # else -- cgit v1.2.3 From fb041bb7c0a918b95c6889fc965cdc4a75b4c0ca Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 21 Nov 2019 11:59:00 +0000 Subject: locking/refcount: Consolidate implementations of refcount_t The generic implementation of refcount_t should be good enough for everybody, so remove ARCH_HAS_REFCOUNT and REFCOUNT_FULL entirely, leaving the generic implementation enabled unconditionally. Signed-off-by: Will Deacon Reviewed-by: Ard Biesheuvel Acked-by: Kees Cook Tested-by: Hanjun Guo Cc: Ard Biesheuvel Cc: Elena Reshetova Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: https://lkml.kernel.org/r/20191121115902.2551-9-will@kernel.org Signed-off-by: Ingo Molnar --- include/linux/refcount.h | 158 +++++++++++++++++------------------------------ 1 file changed, 58 insertions(+), 100 deletions(-) (limited to 'include/linux') diff --git a/include/linux/refcount.h b/include/linux/refcount.h index 757d4630115c..0ac50cf62d06 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -1,64 +1,4 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_REFCOUNT_H -#define _LINUX_REFCOUNT_H - -#include -#include -#include -#include - -struct mutex; - -/** - * struct refcount_t - variant of atomic_t specialized for reference counts - * @refs: atomic_t counter field - * - * The counter saturates at REFCOUNT_SATURATED and will not move once - * there. This avoids wrapping the counter and causing 'spurious' - * use-after-free bugs. - */ -typedef struct refcount_struct { - atomic_t refs; -} refcount_t; - -#define REFCOUNT_INIT(n) { .refs = ATOMIC_INIT(n), } -#define REFCOUNT_MAX INT_MAX -#define REFCOUNT_SATURATED (INT_MIN / 2) - -enum refcount_saturation_type { - REFCOUNT_ADD_NOT_ZERO_OVF, - REFCOUNT_ADD_OVF, - REFCOUNT_ADD_UAF, - REFCOUNT_SUB_UAF, - REFCOUNT_DEC_LEAK, -}; - -void refcount_warn_saturate(refcount_t *r, enum refcount_saturation_type t); - -/** - * refcount_set - set a refcount's value - * @r: the refcount - * @n: value to which the refcount will be set - */ -static inline void refcount_set(refcount_t *r, int n) -{ - atomic_set(&r->refs, n); -} - -/** - * refcount_read - get a refcount's value - * @r: the refcount - * - * Return: the refcount's value - */ -static inline unsigned int refcount_read(const refcount_t *r) -{ - return atomic_read(&r->refs); -} - -#ifdef CONFIG_REFCOUNT_FULL -#include - /* * Variant of atomic_t specialized for reference counts. * @@ -136,6 +76,64 @@ static inline unsigned int refcount_read(const refcount_t *r) * */ +#ifndef _LINUX_REFCOUNT_H +#define _LINUX_REFCOUNT_H + +#include +#include +#include +#include +#include + +struct mutex; + +/** + * struct refcount_t - variant of atomic_t specialized for reference counts + * @refs: atomic_t counter field + * + * The counter saturates at REFCOUNT_SATURATED and will not move once + * there. This avoids wrapping the counter and causing 'spurious' + * use-after-free bugs. + */ +typedef struct refcount_struct { + atomic_t refs; +} refcount_t; + +#define REFCOUNT_INIT(n) { .refs = ATOMIC_INIT(n), } +#define REFCOUNT_MAX INT_MAX +#define REFCOUNT_SATURATED (INT_MIN / 2) + +enum refcount_saturation_type { + REFCOUNT_ADD_NOT_ZERO_OVF, + REFCOUNT_ADD_OVF, + REFCOUNT_ADD_UAF, + REFCOUNT_SUB_UAF, + REFCOUNT_DEC_LEAK, +}; + +void refcount_warn_saturate(refcount_t *r, enum refcount_saturation_type t); + +/** + * refcount_set - set a refcount's value + * @r: the refcount + * @n: value to which the refcount will be set + */ +static inline void refcount_set(refcount_t *r, int n) +{ + atomic_set(&r->refs, n); +} + +/** + * refcount_read - get a refcount's value + * @r: the refcount + * + * Return: the refcount's value + */ +static inline unsigned int refcount_read(const refcount_t *r) +{ + return atomic_read(&r->refs); +} + /** * refcount_add_not_zero - add a value to a refcount unless it is 0 * @i: the value to add to the refcount @@ -298,46 +296,6 @@ static inline void refcount_dec(refcount_t *r) if (unlikely(atomic_fetch_sub_release(1, &r->refs) <= 1)) refcount_warn_saturate(r, REFCOUNT_DEC_LEAK); } -#else /* CONFIG_REFCOUNT_FULL */ -# ifdef CONFIG_ARCH_HAS_REFCOUNT -# include -# else -static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r) -{ - return atomic_add_unless(&r->refs, i, 0); -} - -static inline void refcount_add(int i, refcount_t *r) -{ - atomic_add(i, &r->refs); -} - -static inline __must_check bool refcount_inc_not_zero(refcount_t *r) -{ - return atomic_add_unless(&r->refs, 1, 0); -} - -static inline void refcount_inc(refcount_t *r) -{ - atomic_inc(&r->refs); -} - -static inline __must_check bool refcount_sub_and_test(int i, refcount_t *r) -{ - return atomic_sub_and_test(i, &r->refs); -} - -static inline __must_check bool refcount_dec_and_test(refcount_t *r) -{ - return atomic_dec_and_test(&r->refs); -} - -static inline void refcount_dec(refcount_t *r) -{ - atomic_dec(&r->refs); -} -# endif /* !CONFIG_ARCH_HAS_REFCOUNT */ -#endif /* !CONFIG_REFCOUNT_FULL */ extern __must_check bool refcount_dec_if_one(refcount_t *r); extern __must_check bool refcount_dec_not_one(refcount_t *r); -- cgit v1.2.3 From 2f30b36943adca070f2e1551f701bd524ed1ae5a Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 21 Nov 2019 11:59:01 +0000 Subject: locking/refcount: Remove unused 'refcount_error_report()' function 'refcount_error_report()' has no callers. Remove it. Signed-off-by: Will Deacon Reviewed-by: Ard Biesheuvel Acked-by: Kees Cook Tested-by: Hanjun Guo Cc: Ard Biesheuvel Cc: Elena Reshetova Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: https://lkml.kernel.org/r/20191121115902.2551-10-will@kernel.org Signed-off-by: Ingo Molnar --- include/linux/kernel.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index d83d403dac2e..09f759228e3f 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -328,13 +328,6 @@ extern int oops_may_print(void); void do_exit(long error_code) __noreturn; void complete_and_exit(struct completion *, long) __noreturn; -#ifdef CONFIG_ARCH_HAS_REFCOUNT -void refcount_error_report(struct pt_regs *regs, const char *err); -#else -static inline void refcount_error_report(struct pt_regs *regs, const char *err) -{ } -#endif - /* Internal, do not use. */ int __must_check _kstrtoul(const char *s, unsigned int base, unsigned long *res); int __must_check _kstrtol(const char *s, unsigned int base, long *res); -- cgit v1.2.3 From 0be0ee71816b2b6725e2b4f32ad6726c9d729777 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 11 Nov 2019 15:51:03 -0800 Subject: vfs: properly and reliably lock f_pos in fdget_pos() fdget_pos() is used by file operations that will read and update f_pos: things like "read()", "write()" and "lseek()" (but not, for example, "pread()/pwrite" that get their file positions elsewhere). However, it had two separate escape clauses for this, because not everybody wants or needs serialization of the file position. The first and most obvious case is the "file descriptor doesn't have a position at all", ie a stream-like file. Except we didn't actually use FMODE_STREAM, but instead used FMODE_ATOMIC_POS. The reason for that was that FMODE_STREAM didn't exist back in the days, but also that we didn't want to mark all the special cases, so we only marked the ones that _required_ position atomicity according to POSIX - regular files and directories. The case one was intentionally lazy, but now that we _do_ have FMODE_STREAM we could and should just use it. With the change to use FMODE_STREAM, there are no remaining uses for FMODE_ATOMIC_POS, and all the code to set it is deleted. Any cases where we don't want the serialization because the driver (or subsystem) doesn't use the file position should just be updated to do "stream_open()". We've done that for all the obvious and common situations, we may need a few more. Quoting Kirill Smelkov in the original FMODE_STREAM thread (see link below for full email): "And I appreciate if people could help at least somehow with "getting rid of mixed case entirely" (i.e. always lock f_pos_lock on !FMODE_STREAM), because this transition starts to diverge from my particular use-case too far. To me it makes sense to do that transition as follows: - convert nonseekable_open -> stream_open via stream_open.cocci; - audit other nonseekable_open calls and convert left users that truly don't depend on position to stream_open; - extend stream_open.cocci to analyze alloc_file_pseudo as well (this will cover pipes and sockets), or maybe convert pipes and sockets to FMODE_STREAM manually; - extend stream_open.cocci to analyze file_operations that use no_llseek or noop_llseek, but do not use nonseekable_open or alloc_file_pseudo. This might find files that have stream semantic but are opened differently; - extend stream_open.cocci to analyze file_operations whose .read/.write do not use ppos at all (independently of how file was opened); - ... - after that remove FMODE_ATOMIC_POS and always take f_pos_lock if !FMODE_STREAM; - gather bug reports for deadlocked read/write and convert missed cases to FMODE_STREAM, probably extending stream_open.cocci along the road to catch similar cases i.e. always take f_pos_lock unless a file is explicitly marked as being stream, and try to find and cover all files that are streams" We have not done the "extend stream_open.cocci to analyze alloc_file_pseudo" as well, but the previous commit did manually handle the case of pipes and sockets. The other case where we can avoid locking f_pos is the "this file descriptor only has a single user and it is us, and thus there is no need to lock it". The second test was correct, although a bit subtle and worth just re-iterating here. There are two kinds of other sources of references to the same file descriptor: file descriptors that have been explicitly shared across fork() or with dup(), and file tables having elevated reference counts due to threading (or explicit file sharing with clone()). The first case would have incremented the file count explicitly, and in the second case the previous __fdget() would have incremented it for us and set the FDPUT_FPUT flag. But in both cases the file count would be greater than one, so the "file_count(file) > 1" test catches both situations. Also note that if file_count is 1, that also means that no other thread can have access to the file table, so there also cannot be races with concurrent calls to dup()/fork()/clone() that would increment the file count any other way. Link: https://lore.kernel.org/linux-fsdevel/20190413184404.GA13490@deco.navytux.spb.ru Cc: Kirill Smelkov Cc: Eic Dumazet Cc: Al Viro Cc: Alan Stern Cc: Marco Elver Cc: Andrea Parri Cc: Paul McKenney Signed-off-by: Linus Torvalds --- include/linux/fs.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index e0d909d35763..a7c3f6dd5701 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -148,8 +148,6 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, /* File is opened with O_PATH; almost nothing can be done with it */ #define FMODE_PATH ((__force fmode_t)0x4000) -/* File needs atomic accesses to f_pos */ -#define FMODE_ATOMIC_POS ((__force fmode_t)0x8000) /* Write access to underlying fs */ #define FMODE_WRITER ((__force fmode_t)0x10000) /* Has read method(s) */ -- cgit v1.2.3 From bec170e55982c2d3b8e1beccadf16e288fe6fb5a Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 23 Nov 2019 17:28:37 +0100 Subject: net: phy: add helpers phy_(un)lock_mdio_bus Add helpers to make locking/unlocking the MDIO bus easier. Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- include/linux/phy.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index f5cdfb206097..5032d453ac66 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1076,6 +1076,16 @@ static inline const char *phydev_name(const struct phy_device *phydev) return dev_name(&phydev->mdio.dev); } +static inline void phy_lock_mdio_bus(struct phy_device *phydev) +{ + mutex_lock(&phydev->mdio.bus->mdio_lock); +} + +static inline void phy_unlock_mdio_bus(struct phy_device *phydev) +{ + mutex_unlock(&phydev->mdio.bus->mdio_lock); +} + void phy_attached_print(struct phy_device *phydev, const char *fmt, ...) __printf(2, 3); void phy_attached_info(struct phy_device *phydev); -- cgit v1.2.3 From ba30e27405afa0b13b79532a345977b3e58ad501 Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Mon, 25 Nov 2019 14:28:04 -0800 Subject: Revert "percpu: add __percpu to SHIFT_PERCPU_PTR" This reverts commit 825dbc6ff7a3a063ea91be7d94af940080b0c991. I mistakenly applied this and only now have thought about it a little more and had time to evaluate a kbuild error for dmaengine. Once we're calling RELOC_HIDE, we're moving back into the __kernel address space and letting users interact with the actual memory address rather than in __percpu which is before adding the offsets. Signed-off-by: Dennis Zhou --- include/linux/percpu-defs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index a49b6c702598..a6fabd865211 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -229,7 +229,7 @@ do { \ * pointer value. The weird cast keeps both GCC and sparse happy. */ #define SHIFT_PERCPU_PTR(__p, __offset) \ - RELOC_HIDE((typeof(*(__p)) __kernel __percpu __force *)(__p), (__offset)) + RELOC_HIDE((typeof(*(__p)) __kernel __force *)(__p), (__offset)) #define per_cpu_ptr(ptr, cpu) \ ({ \ -- cgit v1.2.3 From bd3ded3146daa2cbb57ed353749ef99cf75371b0 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 23 Nov 2019 14:17:16 -0700 Subject: net: add __sys_connect_file() helper This is identical to __sys_connect(), except it takes a struct file instead of an fd, and it also allows passing in extra file->f_flags flags. The latter is done to support masking in O_NONBLOCK without manipulating the original file flags. No functional changes in this patch. Cc: netdev@vger.kernel.org Acked-by: David S. Miller Signed-off-by: Jens Axboe --- include/linux/socket.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index 09c32a21555b..4bde63021c09 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -399,6 +399,9 @@ extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen, int flags); extern int __sys_socket(int family, int type, int protocol); extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen); +extern int __sys_connect_file(struct file *file, + struct sockaddr __user *uservaddr, int addrlen, + int file_flags); extern int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen); extern int __sys_listen(int fd, int backlog); -- cgit v1.2.3 From 863fbae929c7a5b64e96b8a3ffb34a29eefb9f8f Mon Sep 17 00:00:00 2001 From: James Smart Date: Thu, 14 Nov 2019 15:15:26 -0800 Subject: nvme_fc: add module to ops template to allow module references In nvme-fc: it's possible to have connected active controllers and as no references are taken on the LLDD, the LLDD can be unloaded. The controller would enter a reconnect state and as long as the LLDD resumed within the reconnect timeout, the controller would resume. But if a namespace on the controller is the root device, allowing the driver to unload can be problematic. To reload the driver, it may require new io to the boot device, and as it's no longer connected we get into a catch-22 that eventually fails, and the system locks up. Fix this issue by taking a module reference for every connected controller (which is what the core layer did to the transport module). Reference is cleared when the controller is removed. Acked-by: Himanshu Madhani Reviewed-by: Christoph Hellwig Signed-off-by: James Smart Signed-off-by: Keith Busch --- include/linux/nvme-fc-driver.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index 10f81629b9ce..6d0d70f3219c 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -270,6 +270,8 @@ struct nvme_fc_remote_port { * * Host/Initiator Transport Entrypoints/Parameters: * + * @module: The LLDD module using the interface + * * @localport_delete: The LLDD initiates deletion of a localport via * nvme_fc_deregister_localport(). However, the teardown is * asynchronous. This routine is called upon the completion of the @@ -383,6 +385,8 @@ struct nvme_fc_remote_port { * Value is Mandatory. Allowed to be zero. */ struct nvme_fc_port_template { + struct module *module; + /* initiator-based functions */ void (*localport_delete)(struct nvme_fc_local_port *); void (*remoteport_delete)(struct nvme_fc_remote_port *); -- cgit v1.2.3 From d41b0e64d206f8948212b4d0f30c330db95c9636 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 14 Oct 2019 12:04:52 +0200 Subject: PCI/MSI: Remove unused pci_irq_get_node() The function pci_irq_get_node() is not used by anyone in the tree, so just delete it. Link: https://lore.kernel.org/r/20191014100452.GA6699@kroah.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Bjorn Helgaas Reviewed-by: Andrew Murray --- include/linux/pci.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index f9088c89a534..755d8c0176b9 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1454,7 +1454,6 @@ int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, void pci_free_irq_vectors(struct pci_dev *dev); int pci_irq_vector(struct pci_dev *dev, unsigned int nr); const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev, int vec); -int pci_irq_get_node(struct pci_dev *pdev, int vec); #else static inline int pci_msi_vec_count(struct pci_dev *dev) { return -ENOSYS; } @@ -1497,11 +1496,6 @@ static inline const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev, { return cpu_possible_mask; } - -static inline int pci_irq_get_node(struct pci_dev *pdev, int vec) -{ - return first_online_node; -} #endif /** -- cgit v1.2.3 From 2be7d348fe924f0c5583c6a805bd42cecda93104 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 26 Nov 2019 11:34:06 -0800 Subject: Revert "vfs: properly and reliably lock f_pos in fdget_pos()" This reverts commit 0be0ee71816b2b6725e2b4f32ad6726c9d729777. I was hoping it would be benign to switch over entirely to FMODE_STREAM, and we'd have just a couple of small fixups we'd need, but it looks like we're not quite there yet. While it worked fine on both my desktop and laptop, they are fairly similar in other respects, and run mostly the same loads. Kenneth Crudup reports that it seems to break both his vmware installation and the KDE upower service. In both cases apparently leading to timeouts due to waitinmg for the f_pos lock. There are a number of character devices in particular that definitely want stream-like behavior, but that currently don't get marked as streams, and as a result get the exclusion between concurrent read()/write() on the same file descriptor. Which doesn't work well for them. The most obvious example if this is /dev/console and /dev/tty, which use console_fops and tty_fops respectively (and ptmx_fops for the pty master side). It may be that it's just this that causes problems, but we clearly weren't ready yet. Because there's a number of other likely common cases that don't have llseek implementations and would seem to act as stream devices: /dev/fuse (fuse_dev_operations) /dev/mcelog (mce_chrdev_ops) /dev/mei0 (mei_fops) /dev/net/tun (tun_fops) /dev/nvme0 (nvme_dev_fops) /dev/tpm0 (tpm_fops) /proc/self/ns/mnt (ns_file_operations) /dev/snd/pcm* (snd_pcm_f_ops[]) and while some of these could be trivially automatically detected by the vfs layer when the character device is opened by just noticing that they have no read or write operations either, it often isn't that obvious. Some character devices most definitely do use the file position, even if they don't allow seeking: the firmware update code, for example, uses simple_read_from_buffer() that does use f_pos, but doesn't allow seeking back and forth. We'll revisit this when there's a better way to detect the problem and fix it (possibly with a coccinelle script to do more of the FMODE_STREAM annotations). Reported-by: Kenneth R. Crudup Cc: Kirill Smelkov Signed-off-by: Linus Torvalds --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index dde6dc4492a0..ae6c5c37f3ae 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -148,6 +148,8 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, /* File is opened with O_PATH; almost nothing can be done with it */ #define FMODE_PATH ((__force fmode_t)0x4000) +/* File needs atomic accesses to f_pos */ +#define FMODE_ATOMIC_POS ((__force fmode_t)0x8000) /* Write access to underlying fs */ #define FMODE_WRITER ((__force fmode_t)0x10000) /* Has read method(s) */ -- cgit v1.2.3 From a8de1304b7df30e3a14f2a8b9709bb4ff31a0385 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 13 Nov 2019 16:12:02 +0900 Subject: libfdt: define INT32_MAX and UINT32_MAX in libfdt_env.h The DTC v1.5.1 added references to (U)INT32_MAX. This is no problem for user-space programs since defines (U)INT32_MAX along with (u)int32_t. For the kernel space, libfdt_env.h needs to be adjusted before we pull in the changes. In the kernel, we usually use s/u32 instead of (u)int32_t for the fixed-width types. Accordingly, we already have S/U32_MAX for their max values. So, we should not add (U)INT32_MAX to any more. Instead, add them to the in-kernel libfdt_env.h to compile the latest libfdt. Signed-off-by: Masahiro Yamada Signed-off-by: Rob Herring --- include/linux/libfdt_env.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/libfdt_env.h b/include/linux/libfdt_env.h index 2231eb855e8f..cea8574a29b1 100644 --- a/include/linux/libfdt_env.h +++ b/include/linux/libfdt_env.h @@ -7,6 +7,9 @@ #include +#define INT32_MAX S32_MAX +#define UINT32_MAX U32_MAX + typedef __be16 fdt16_t; typedef __be32 fdt32_t; typedef __be64 fdt64_t; -- cgit v1.2.3 From 82995cc6c5ae4bf4d72edef381a085e52d5b5905 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 25 Mar 2019 16:38:32 +0000 Subject: libceph, rbd, ceph: convert to use the new mount API Convert the ceph filesystem to the new internal mount API as the old one will be obsoleted and removed. This allows greater flexibility in communication of mount parameters between userspace, the VFS and the filesystem. See Documentation/filesystems/mount_api.txt for more information. [ Numerous string handling, leak and regression fixes; rbd conversion was particularly broken and had to be redone almost from scratch. ] Signed-off-by: David Howells Signed-off-by: Jeff Layton Signed-off-by: Ilya Dryomov --- include/linux/ceph/libceph.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index b9dbda1c26aa..8fe9b80e80a5 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -280,10 +280,12 @@ extern const char *ceph_msg_type_name(int type); extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid); extern void *ceph_kvmalloc(size_t size, gfp_t flags); -extern struct ceph_options *ceph_parse_options(char *options, - const char *dev_name, const char *dev_name_end, - int (*parse_extra_token)(char *c, void *private), - void *private); +struct fs_parameter; +struct ceph_options *ceph_alloc_options(void); +int ceph_parse_mon_ips(const char *buf, size_t len, struct ceph_options *opt, + struct fs_context *fc); +int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt, + struct fs_context *fc); int ceph_print_client_options(struct seq_file *m, struct ceph_client *client, bool show_all); extern void ceph_destroy_options(struct ceph_options *opt); -- cgit v1.2.3 From c1d084759c95ecd0ef08274654a1f6c4f343cdcd Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 6 Nov 2019 10:50:19 +0100 Subject: i2c: replace i2c_new_probed_device with an ERR_PTR variant In the general move to have i2c_new_*_device functions which return ERR_PTR instead of NULL, this patch converts i2c_new_probed_device(). There are only few users, so this patch converts the I2C core and all users in one go. The function gets renamed to i2c_new_scanned_device() so out-of-tree users will get a build failure to understand they need to adapt their error checking code. Signed-off-by: Wolfram Sang Reviewed-by: Luca Ceresoli Reviewed-by: Max Staudt Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 88b825601f3d..d2f786706657 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -452,10 +452,16 @@ i2c_new_client_device(struct i2c_adapter *adap, struct i2c_board_info const *inf * a default probing method is used. */ extern struct i2c_client * +i2c_new_scanned_device(struct i2c_adapter *adap, + struct i2c_board_info *info, + unsigned short const *addr_list, + int (*probe)(struct i2c_adapter *adap, unsigned short addr)); + +extern struct i2c_client * i2c_new_probed_device(struct i2c_adapter *adap, - struct i2c_board_info *info, - unsigned short const *addr_list, - int (*probe)(struct i2c_adapter *adap, unsigned short addr)); + struct i2c_board_info *info, + unsigned short const *addr_list, + int (*probe)(struct i2c_adapter *adap, unsigned short addr)); /* Common custom probe functions */ extern int i2c_probe_func_quick_read(struct i2c_adapter *adap, unsigned short addr); -- cgit v1.2.3 From 031097d9e079e40dce401031d1012e83d80eaf01 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 27 Nov 2019 12:16:41 -0800 Subject: net: skmsg: fix TLS 1.3 crash with full sk_msg TLS 1.3 started using the entry at the end of the SG array for chaining-in the single byte content type entry. This mostly works: [ E E E E E E . . ] ^ ^ start end E < content type / [ E E E E E E C . ] ^ ^ start end (Where E denotes a populated SG entry; C denotes a chaining entry.) If the array is full, however, the end will point to the start: [ E E E E E E E E ] ^ start end And we end up overwriting the start: E < content type / [ C E E E E E E E ] ^ start end The sg array is supposed to be a circular buffer with start and end markers pointing anywhere. In case where start > end (i.e. the circular buffer has "wrapped") there is an extra entry reserved at the end to chain the two halves together. [ E E E E E E . . l ] (Where l is the reserved entry for "looping" back to front. As suggested by John, let's reserve another entry for chaining SG entries after the main circular buffer. Note that this entry has to be pointed to by the end entry so its position is not fixed. Examples of full messages: [ E E E E E E E E . l ] ^ ^ start end <---------------. [ E E . E E E E E E l ] ^ ^ end start Now the end will always point to an unused entry, so TLS 1.3 can always use it. Fixes: 130b392c6cd6 ("net: tls: Add tls 1.3 support") Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/linux/skmsg.h | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 6cb077b646a5..ef7031f8a304 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -14,6 +14,7 @@ #include #define MAX_MSG_FRAGS MAX_SKB_FRAGS +#define NR_MSG_FRAG_IDS (MAX_MSG_FRAGS + 1) enum __sk_action { __SK_DROP = 0, @@ -29,13 +30,15 @@ struct sk_msg_sg { u32 size; u32 copybreak; unsigned long copy; - /* The extra element is used for chaining the front and sections when - * the list becomes partitioned (e.g. end < start). The crypto APIs - * require the chaining. + /* The extra two elements: + * 1) used for chaining the front and sections when the list becomes + * partitioned (e.g. end < start). The crypto APIs require the + * chaining; + * 2) to chain tailer SG entries after the message. */ - struct scatterlist data[MAX_MSG_FRAGS + 1]; + struct scatterlist data[MAX_MSG_FRAGS + 2]; }; -static_assert(BITS_PER_LONG >= MAX_MSG_FRAGS); +static_assert(BITS_PER_LONG >= NR_MSG_FRAG_IDS); /* UAPI in filter.c depends on struct sk_msg_sg being first element. */ struct sk_msg { @@ -142,13 +145,13 @@ static inline void sk_msg_apply_bytes(struct sk_psock *psock, u32 bytes) static inline u32 sk_msg_iter_dist(u32 start, u32 end) { - return end >= start ? end - start : end + (MAX_MSG_FRAGS - start); + return end >= start ? end - start : end + (NR_MSG_FRAG_IDS - start); } #define sk_msg_iter_var_prev(var) \ do { \ if (var == 0) \ - var = MAX_MSG_FRAGS - 1; \ + var = NR_MSG_FRAG_IDS - 1; \ else \ var--; \ } while (0) @@ -156,7 +159,7 @@ static inline u32 sk_msg_iter_dist(u32 start, u32 end) #define sk_msg_iter_var_next(var) \ do { \ var++; \ - if (var == MAX_MSG_FRAGS) \ + if (var == NR_MSG_FRAG_IDS) \ var = 0; \ } while (0) @@ -173,9 +176,9 @@ static inline void sk_msg_clear_meta(struct sk_msg *msg) static inline void sk_msg_init(struct sk_msg *msg) { - BUILD_BUG_ON(ARRAY_SIZE(msg->sg.data) - 1 != MAX_MSG_FRAGS); + BUILD_BUG_ON(ARRAY_SIZE(msg->sg.data) - 1 != NR_MSG_FRAG_IDS); memset(msg, 0, sizeof(*msg)); - sg_init_marker(msg->sg.data, MAX_MSG_FRAGS); + sg_init_marker(msg->sg.data, NR_MSG_FRAG_IDS); } static inline void sk_msg_xfer(struct sk_msg *dst, struct sk_msg *src, @@ -196,14 +199,11 @@ static inline void sk_msg_xfer_full(struct sk_msg *dst, struct sk_msg *src) static inline bool sk_msg_full(const struct sk_msg *msg) { - return (msg->sg.end == msg->sg.start) && msg->sg.size; + return sk_msg_iter_dist(msg->sg.start, msg->sg.end) == MAX_MSG_FRAGS; } static inline u32 sk_msg_elem_used(const struct sk_msg *msg) { - if (sk_msg_full(msg)) - return MAX_MSG_FRAGS; - return sk_msg_iter_dist(msg->sg.start, msg->sg.end); } -- cgit v1.2.3 From ba1e78a1dc0ca3e92f0be82279e6ba24177af7d6 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 21 Nov 2019 19:41:51 +0100 Subject: cpuidle: Drop disabled field from struct cpuidle_state After recent cpuidle updates the "disabled" field in struct cpuidle_state is only used by two drivers (intel_idle and shmobile cpuidle) for marking unusable idle states, but that may as well be achieved with the help of a state flag, so define an "unusable" idle state flag, CPUIDLE_FLAG_UNUSABLE, make the drivers in question use it instead of the "disabled" field and make the core set CPUIDLE_STATE_DISABLED_BY_DRIVER for the idle states with that flag set. After the above changes, the "disabled" field in struct cpuidle_state is not used any more, so drop it. No intentional functional impact. Signed-off-by: Rafael J. Wysocki --- include/linux/cpuidle.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 2dbe46b7c213..1dabe36bd011 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -54,7 +54,6 @@ struct cpuidle_state { unsigned int exit_latency; /* in US */ int power_usage; /* in mW */ unsigned int target_residency; /* in US */ - bool disabled; /* disabled on all CPUs */ int (*enter) (struct cpuidle_device *dev, struct cpuidle_driver *drv, @@ -77,6 +76,7 @@ struct cpuidle_state { #define CPUIDLE_FLAG_POLLING BIT(0) /* polling state */ #define CPUIDLE_FLAG_COUPLED BIT(1) /* state applies to multiple cpus */ #define CPUIDLE_FLAG_TIMER_STOP BIT(2) /* timer is stopped on this state */ +#define CPUIDLE_FLAG_UNUSABLE BIT(3) /* avoid using this state */ struct cpuidle_device_kobj; struct cpuidle_state_kobj; -- cgit v1.2.3 From c6a3aea93571a5393602256d8f74772bd64c8225 Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Tue, 26 Nov 2019 17:17:11 +0200 Subject: PM / QoS: Redefine FREQ_QOS_MAX_DEFAULT_VALUE to S32_MAX QOS requests for DEFAULT_VALUE are supposed to be ignored but this is not the case for FREQ_QOS_MAX. Adding one request for MAX_DEFAULT_VALUE and one for a real value will cause freq_qos_read_value to unexpectedly return MAX_DEFAULT_VALUE (-1). This happens because freq_qos max value is aggregated with PM_QOS_MIN but FREQ_QOS_MAX_DEFAULT_VALUE is (-1) so it's smaller than other values. Fix this by redefining FREQ_QOS_MAX_DEFAULT_VALUE to S32_MAX. Looking at current users for freq_qos it seems that none of them create requests for FREQ_QOS_MAX_DEFAULT_VALUE. Fixes: 77751a466ebd ("PM: QoS: Introduce frequency QoS") Signed-off-by: Leonard Crestez Reported-by: Matthias Kaehlcke Reviewed-by: Matthias Kaehlcke Cc: 5.4+ # 5.4+ Signed-off-by: Rafael J. Wysocki --- include/linux/pm_qos.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index ebf5ef17cc2a..24a6263c9931 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -256,7 +256,7 @@ static inline s32 dev_pm_qos_raw_resume_latency(struct device *dev) #endif #define FREQ_QOS_MIN_DEFAULT_VALUE 0 -#define FREQ_QOS_MAX_DEFAULT_VALUE (-1) +#define FREQ_QOS_MAX_DEFAULT_VALUE S32_MAX enum freq_qos_req_type { FREQ_QOS_MIN = 1, -- cgit v1.2.3 From 342035f66c866f4ad750477b21b210e98d1f6818 Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Tue, 26 Nov 2019 17:17:12 +0200 Subject: PM / QoS: Reorder pm_qos/freq_qos/dev_pm_qos structs This allows dev_pm_qos to embed freq_qos structs, which is done in the next patch. Separate commit to make it easier to review. Signed-off-by: Leonard Crestez Reviewed-by: Matthias Kaehlcke Signed-off-by: Rafael J. Wysocki --- include/linux/pm_qos.h | 74 ++++++++++++++++++++++++++------------------------ 1 file changed, 38 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index 24a6263c9931..678fec6da5b9 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -49,21 +49,6 @@ struct pm_qos_flags_request { s32 flags; /* Do not change to 64 bit */ }; -enum dev_pm_qos_req_type { - DEV_PM_QOS_RESUME_LATENCY = 1, - DEV_PM_QOS_LATENCY_TOLERANCE, - DEV_PM_QOS_FLAGS, -}; - -struct dev_pm_qos_request { - enum dev_pm_qos_req_type type; - union { - struct plist_node pnode; - struct pm_qos_flags_request flr; - } data; - struct device *dev; -}; - enum pm_qos_type { PM_QOS_UNITIALIZED, PM_QOS_MAX, /* return the largest value */ @@ -90,6 +75,44 @@ struct pm_qos_flags { s32 effective_flags; /* Do not change to 64 bit */ }; + +#define FREQ_QOS_MIN_DEFAULT_VALUE 0 +#define FREQ_QOS_MAX_DEFAULT_VALUE S32_MAX + +enum freq_qos_req_type { + FREQ_QOS_MIN = 1, + FREQ_QOS_MAX, +}; + +struct freq_constraints { + struct pm_qos_constraints min_freq; + struct blocking_notifier_head min_freq_notifiers; + struct pm_qos_constraints max_freq; + struct blocking_notifier_head max_freq_notifiers; +}; + +struct freq_qos_request { + enum freq_qos_req_type type; + struct plist_node pnode; + struct freq_constraints *qos; +}; + + +enum dev_pm_qos_req_type { + DEV_PM_QOS_RESUME_LATENCY = 1, + DEV_PM_QOS_LATENCY_TOLERANCE, + DEV_PM_QOS_FLAGS, +}; + +struct dev_pm_qos_request { + enum dev_pm_qos_req_type type; + union { + struct plist_node pnode; + struct pm_qos_flags_request flr; + } data; + struct device *dev; +}; + struct dev_pm_qos { struct pm_qos_constraints resume_latency; struct pm_qos_constraints latency_tolerance; @@ -255,27 +278,6 @@ static inline s32 dev_pm_qos_raw_resume_latency(struct device *dev) } #endif -#define FREQ_QOS_MIN_DEFAULT_VALUE 0 -#define FREQ_QOS_MAX_DEFAULT_VALUE S32_MAX - -enum freq_qos_req_type { - FREQ_QOS_MIN = 1, - FREQ_QOS_MAX, -}; - -struct freq_constraints { - struct pm_qos_constraints min_freq; - struct blocking_notifier_head min_freq_notifiers; - struct pm_qos_constraints max_freq; - struct blocking_notifier_head max_freq_notifiers; -}; - -struct freq_qos_request { - enum freq_qos_req_type type; - struct plist_node pnode; - struct freq_constraints *qos; -}; - static inline int freq_qos_request_active(struct freq_qos_request *req) { return !IS_ERR_OR_NULL(req->qos); -- cgit v1.2.3 From 36a8015f89e40f7c9c91cc7e6d028fa288dad27b Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Tue, 26 Nov 2019 17:17:13 +0200 Subject: PM / QoS: Restore DEV_PM_QOS_MIN/MAX_FREQUENCY Support for adding per-device frequency limits was removed in commit 2aac8bdf7a0f ("PM: QoS: Drop frequency QoS types from device PM QoS") after cpufreq switched to use a new "freq_constraints" construct. Restore support for per-device freq limits but base this upon freq_constraints. This is primarily meant to be used by the devfreq subsystem. This removes the "static" marking on freq_qos_apply but does not export it for modules. Signed-off-by: Leonard Crestez Reviewed-by: Matthias Kaehlcke Tested-by: Matthias Kaehlcke Signed-off-by: Rafael J. Wysocki --- include/linux/pm_qos.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index 678fec6da5b9..19eafca5680e 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -34,6 +34,8 @@ enum pm_qos_flags_status { #define PM_QOS_RESUME_LATENCY_NO_CONSTRAINT PM_QOS_LATENCY_ANY #define PM_QOS_RESUME_LATENCY_NO_CONSTRAINT_NS PM_QOS_LATENCY_ANY_NS #define PM_QOS_LATENCY_TOLERANCE_DEFAULT_VALUE 0 +#define PM_QOS_MIN_FREQUENCY_DEFAULT_VALUE 0 +#define PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE FREQ_QOS_MAX_DEFAULT_VALUE #define PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT (-1) #define PM_QOS_FLAG_NO_POWER_OFF (1 << 0) @@ -101,6 +103,8 @@ struct freq_qos_request { enum dev_pm_qos_req_type { DEV_PM_QOS_RESUME_LATENCY = 1, DEV_PM_QOS_LATENCY_TOLERANCE, + DEV_PM_QOS_MIN_FREQUENCY, + DEV_PM_QOS_MAX_FREQUENCY, DEV_PM_QOS_FLAGS, }; @@ -109,6 +113,7 @@ struct dev_pm_qos_request { union { struct plist_node pnode; struct pm_qos_flags_request flr; + struct freq_qos_request freq; } data; struct device *dev; }; @@ -116,6 +121,7 @@ struct dev_pm_qos_request { struct dev_pm_qos { struct pm_qos_constraints resume_latency; struct pm_qos_constraints latency_tolerance; + struct freq_constraints freq; struct pm_qos_flags flags; struct dev_pm_qos_request *resume_latency_req; struct dev_pm_qos_request *latency_tolerance_req; @@ -214,6 +220,10 @@ static inline s32 dev_pm_qos_read_value(struct device *dev, switch (type) { case DEV_PM_QOS_RESUME_LATENCY: return PM_QOS_RESUME_LATENCY_NO_CONSTRAINT; + case DEV_PM_QOS_MIN_FREQUENCY: + return PM_QOS_MIN_FREQUENCY_DEFAULT_VALUE; + case DEV_PM_QOS_MAX_FREQUENCY: + return PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE; default: WARN_ON(1); return 0; @@ -293,6 +303,8 @@ int freq_qos_add_request(struct freq_constraints *qos, enum freq_qos_req_type type, s32 value); int freq_qos_update_request(struct freq_qos_request *req, s32 new_value); int freq_qos_remove_request(struct freq_qos_request *req); +int freq_qos_apply(struct freq_qos_request *req, + enum pm_qos_req_action action, s32 value); int freq_qos_add_notifier(struct freq_constraints *qos, enum freq_qos_req_type type, -- cgit v1.2.3 From dc0a7f7558dd52e972408ebb535b0153c06d08c2 Mon Sep 17 00:00:00 2001 From: Pengfei Li Date: Sat, 30 Nov 2019 17:49:25 -0800 Subject: mm, slab: remove unused kmalloc_size() The size of kmalloc can be obtained from kmalloc_info[], so remove kmalloc_size() that will not be used anymore. Link: http://lkml.kernel.org/r/1569241648-26908-3-git-send-email-lpf.vector@gmail.com Signed-off-by: Pengfei Li Acked-by: Vlastimil Babka Acked-by: Roman Gushchin Acked-by: David Rientjes Cc: Christoph Lameter Cc: Joonsoo Kim Cc: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 4d2a2fa55ed5..877a95c6a2d2 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -561,26 +561,6 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags) return __kmalloc(size, flags); } -/* - * Determine size used for the nth kmalloc cache. - * return size or 0 if a kmalloc cache for that - * size does not exist - */ -static __always_inline unsigned int kmalloc_size(unsigned int n) -{ -#ifndef CONFIG_SLOB - if (n > 2) - return 1U << n; - - if (n == 1 && KMALLOC_MIN_SIZE <= 32) - return 96; - - if (n == 2 && KMALLOC_MIN_SIZE <= 64) - return 192; -#endif - return 0; -} - static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) { #ifndef CONFIG_SLOB -- cgit v1.2.3 From a92853b6746fe5ffef20a7c30addf6320561e669 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Sat, 30 Nov 2019 17:49:44 -0800 Subject: fs/direct-io.c: keep dio_warn_stale_pagecache() when CONFIG_BLOCK=n This helper prints warning if direct I/O write failed to invalidate cache, and set EIO at inode to warn usersapce about possible data corruption. See also commit 5a9d929d6e13 ("iomap: report collisions between directio and buffered writes to userspace"). Direct I/O is supported by non-disk filesystems, for example NFS. Thus generic code needs this even in kernel without CONFIG_BLOCK. Link: http://lkml.kernel.org/r/157270038074.4812.7980855544557488880.stgit@buzz Signed-off-by: Konstantin Khlebnikov Reviewed-by: Andrew Morton Reviewed-by: Jan Kara Cc: Jens Axboe Cc: Alexander Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index ae6c5c37f3ae..eeed80fab36a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3149,7 +3149,6 @@ enum { }; void dio_end_io(struct bio *bio); -void dio_warn_stale_pagecache(struct file *filp); ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, struct block_device *bdev, struct iov_iter *iter, @@ -3194,6 +3193,11 @@ static inline void inode_dio_end(struct inode *inode) wake_up_bit(&inode->i_state, __I_DIO_WAKEUP); } +/* + * Warn about a page cache invalidation failure diring a direct I/O write. + */ +void dio_warn_stale_pagecache(struct file *filp); + extern void inode_set_flags(struct inode *inode, unsigned int flags, unsigned int mask); -- cgit v1.2.3 From 9da83f3fc74b806ee419a29977ef0239454bd8ec Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Sat, 30 Nov 2019 17:50:03 -0800 Subject: mm, memcg: clean up reclaim iter array The mem_cgroup_reclaim_cookie is only used in memcg softlimit reclaim now, and the priority of the reclaim is always 0. We don't need to define the iter in struct mem_cgroup_per_node as an array any more. That could make the code more clear and save some space. Link: http://lkml.kernel.org/r/1569897728-1686-1-git-send-email-laoar.shao@gmail.com Signed-off-by: Yafang Shao Acked-by: Michal Hocko Cc: Johannes Weiner Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index ae703ea3ef48..2b34925fc19d 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -58,7 +58,6 @@ enum mem_cgroup_protection { struct mem_cgroup_reclaim_cookie { pg_data_t *pgdat; - int priority; unsigned int generation; }; @@ -126,7 +125,7 @@ struct mem_cgroup_per_node { unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS]; - struct mem_cgroup_reclaim_iter iter[DEF_PRIORITY + 1]; + struct mem_cgroup_reclaim_iter iter; struct memcg_shrinker_map __rcu *shrinker_map; -- cgit v1.2.3 From 242c37b459ce9ea1be53b75bdb76a7d9268a0791 Mon Sep 17 00:00:00 2001 From: Hao Lee Date: Sat, 30 Nov 2019 17:50:12 -0800 Subject: include/linux/memcontrol.h: fix comments based on per-node memcg These comments should be updated as memcg limit enforcement has been moved from zones to nodes. Link: http://lkml.kernel.org/r/20191022150618.GA15519@haolee.github.io Signed-off-by: Hao Lee Acked-by: Roman Gushchin Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 2b34925fc19d..e82928deea88 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -111,7 +111,7 @@ struct memcg_shrinker_map { }; /* - * per-zone information in memory controller. + * per-node information in memory controller. */ struct mem_cgroup_per_node { struct lruvec lruvec; @@ -398,8 +398,7 @@ mem_cgroup_nodeinfo(struct mem_cgroup *memcg, int nid) * @memcg: memcg of the wanted lruvec * * Returns the lru list vector holding pages for a given @node or a given - * @memcg and @zone. This can be the node lruvec, if the memory controller - * is disabled. + * @memcg. This can be the node lruvec, if the memory controller is disabled. */ static inline struct lruvec *mem_cgroup_lruvec(struct pglist_data *pgdat, struct mem_cgroup *memcg) -- cgit v1.2.3 From fa40d1ee9f156624658ca409a04a78882ca5b3c5 Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Sat, 30 Nov 2019 17:50:16 -0800 Subject: mm: vmscan: memcontrol: remove mem_cgroup_select_victim_node() Since commit 1ba6fc9af35b ("mm: vmscan: do not share cgroup iteration between reclaimers"), the memcg reclaim does not bail out earlier based on sc->nr_reclaimed and will traverse all the nodes. All the reclaimable pages of the memcg on all the nodes will be scanned relative to the reclaim priority. So, there is no need to maintain state regarding which node to start the memcg reclaim from. This patch effectively reverts the commit 889976dbcb12 ("memcg: reclaim memory from nodes in round-robin order") and commit 453a9bf347f1 ("memcg: fix numa scan information update to be triggered by memory event"). [shakeelb@google.com: v2] Link: http://lkml.kernel.org/r/20191030204232.139424-1-shakeelb@google.com Link: http://lkml.kernel.org/r/20191029234753.224143-1-shakeelb@google.com Signed-off-by: Shakeel Butt Acked-by: Roman Gushchin Acked-by: Michal Hocko Acked-by: Johannes Weiner Cc: Greg Thelen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index e82928deea88..239e752a7817 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -80,7 +80,6 @@ struct mem_cgroup_id { enum mem_cgroup_events_target { MEM_CGROUP_TARGET_THRESH, MEM_CGROUP_TARGET_SOFTLIMIT, - MEM_CGROUP_TARGET_NUMAINFO, MEM_CGROUP_NTARGETS, }; @@ -312,13 +311,6 @@ struct mem_cgroup { struct list_head kmem_caches; #endif - int last_scanned_node; -#if MAX_NUMNODES > 1 - nodemask_t scan_nodes; - atomic_t numainfo_events; - atomic_t numainfo_updating; -#endif - #ifdef CONFIG_CGROUP_WRITEBACK struct list_head cgwb_list; struct wb_domain cgwb_domain; -- cgit v1.2.3 From b3d1411b6726ea6930222f8f12587d89762477c6 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Sat, 30 Nov 2019 17:50:30 -0800 Subject: mm: emit tracepoint when RSS changes Useful to track how RSS is changing per TGID to detect spikes in RSS and memory hogs. Several Android teams have been using this patch in various kernel trees for half a year now. Many reported to me it is really useful so I'm posting it upstream. Initial patch developed by Tim Murray. Changes I made from original patch: o Prevent any additional space consumed by mm_struct. Regarding the fact that the RSS may change too often thus flooding the traces - note that, there is some "hysterisis" with this already. That is - We update the counter only if we receive 64 page faults due to SPLIT_RSS_ACCOUNTING. However, during zapping or copying of pte range, the RSS is updated immediately which can become noisy/flooding. In a previous discussion, we agreed that BPF or ftrace can be used to rate limit the signal if this becomes an issue. Also note that I added wrappers to trace_rss_stat to prevent compiler errors where linux/mm.h is included from tracing code, causing errors such as: CC kernel/trace/power-traces.o In file included from ./include/trace/define_trace.h:102, from ./include/trace/events/kmem.h:342, from ./include/linux/mm.h:31, from ./include/linux/ring_buffer.h:5, from ./include/linux/trace_events.h:6, from ./include/trace/events/power.h:12, from kernel/trace/power-traces.c:15: ./include/trace/trace_events.h:113:22: error: field `ent' has incomplete type struct trace_entry ent; \ Link: http://lore.kernel.org/r/20190903200905.198642-1-joel@joelfernandes.org Link: http://lkml.kernel.org/r/20191001172817.234886-1-joel@joelfernandes.org Co-developed-by: Tim Murray Signed-off-by: Tim Murray Signed-off-by: Joel Fernandes (Google) Acked-by: Michal Hocko Cc: Carmen Jackson Cc: Mayank Gupta Cc: Daniel Colascione Cc: Steven Rostedt (VMware) Cc: Minchan Kim Cc: "Aneesh Kumar K.V" Cc: Dan Williams Cc: Jerome Glisse Cc: Matthew Wilcox Cc: Ralph Campbell Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index f6fb714fa851..935383081397 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1643,19 +1643,27 @@ static inline unsigned long get_mm_counter(struct mm_struct *mm, int member) return (unsigned long)val; } +void mm_trace_rss_stat(int member, long count); + static inline void add_mm_counter(struct mm_struct *mm, int member, long value) { - atomic_long_add(value, &mm->rss_stat.count[member]); + long count = atomic_long_add_return(value, &mm->rss_stat.count[member]); + + mm_trace_rss_stat(member, count); } static inline void inc_mm_counter(struct mm_struct *mm, int member) { - atomic_long_inc(&mm->rss_stat.count[member]); + long count = atomic_long_inc_return(&mm->rss_stat.count[member]); + + mm_trace_rss_stat(member, count); } static inline void dec_mm_counter(struct mm_struct *mm, int member) { - atomic_long_dec(&mm->rss_stat.count[member]); + long count = atomic_long_dec_return(&mm->rss_stat.count[member]); + + mm_trace_rss_stat(member, count); } /* Optimized variant when page is already known not to be PageAnon */ -- cgit v1.2.3 From e4dcad204d3a281be6f8573e0a82648a4ad84e69 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Sat, 30 Nov 2019 17:50:33 -0800 Subject: rss_stat: add support to detect RSS updates of external mm When a process updates the RSS of a different process, the rss_stat tracepoint appears in the context of the process doing the update. This can confuse userspace that the RSS of process doing the update is updated, while in reality a different process's RSS was updated. This issue happens in reclaim paths such as with direct reclaim or background reclaim. This patch adds more information to the tracepoint about whether the mm being updated belongs to the current process's context (curr field). We also include a hash of the mm pointer so that the process who the mm belongs to can be uniquely identified (mm_id field). Also vsprintf.c is refactored a bit to allow reuse of hashing code. [akpm@linux-foundation.org: remove unused local `str'] [joelaf@google.com: inline call to ptr_to_hashval] Link: http://lore.kernel.org/r/20191113153816.14b95acd@gandalf.local.home Link: http://lkml.kernel.org/r/20191114164622.GC233237@google.com Link: http://lkml.kernel.org/r/20191106024452.81923-1-joel@joelfernandes.org Signed-off-by: Joel Fernandes (Google) Reported-by: Ioannis Ilkos Acked-by: Petr Mladek [lib/vsprintf.c] Cc: Tim Murray Cc: Michal Hocko Cc: Carmen Jackson Cc: Mayank Gupta Cc: Daniel Colascione Cc: Steven Rostedt (VMware) Cc: Minchan Kim Cc: "Aneesh Kumar K.V" Cc: Dan Williams Cc: Jerome Glisse Cc: Matthew Wilcox Cc: Ralph Campbell Cc: Vlastimil Babka Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 8 ++++---- include/linux/string.h | 2 ++ 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 935383081397..b5b2523c80af 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1643,27 +1643,27 @@ static inline unsigned long get_mm_counter(struct mm_struct *mm, int member) return (unsigned long)val; } -void mm_trace_rss_stat(int member, long count); +void mm_trace_rss_stat(struct mm_struct *mm, int member, long count); static inline void add_mm_counter(struct mm_struct *mm, int member, long value) { long count = atomic_long_add_return(value, &mm->rss_stat.count[member]); - mm_trace_rss_stat(member, count); + mm_trace_rss_stat(mm, member, count); } static inline void inc_mm_counter(struct mm_struct *mm, int member) { long count = atomic_long_inc_return(&mm->rss_stat.count[member]); - mm_trace_rss_stat(member, count); + mm_trace_rss_stat(mm, member, count); } static inline void dec_mm_counter(struct mm_struct *mm, int member) { long count = atomic_long_dec_return(&mm->rss_stat.count[member]); - mm_trace_rss_stat(member, count); + mm_trace_rss_stat(mm, member, count); } /* Optimized variant when page is already known not to be PageAnon */ diff --git a/include/linux/string.h b/include/linux/string.h index b6ccdc2c7f02..02894e417565 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -216,6 +216,8 @@ int bprintf(u32 *bin_buf, size_t size, const char *fmt, ...) __printf(3, 4); extern ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos, const void *from, size_t available); +int ptr_to_hashval(const void *ptr, unsigned long *hashval_out); + /** * strstarts - does @str start with @prefix? * @str: string to examine -- cgit v1.2.3 From bf1a12a8095615c9486f5463ca473d2d69ff6952 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Sat, 30 Nov 2019 17:51:29 -0800 Subject: mm: move the backup x_devmap() functions to asm-generic/pgtable.h The asm-generic/pgtable.h include file appears to be the correct place for the backup x_devmap() inline functions. Moving them here is also necessary if we want to include x_devmap() in the [pmd|pud]_unstable functions. So move the x_devmap() functions to asm-generic/pgtable.h Link: http://lkml.kernel.org/r/20191115115808.21181-1-thomas_os@shipmail.org Signed-off-by: Thomas Hellstrom Cc: Arnd Bergmann Cc: "Kirill A. Shutemov" Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index b5b2523c80af..06b51d8728ec 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -564,21 +564,6 @@ int vma_is_stack_for_current(struct vm_area_struct *vma); struct mmu_gather; struct inode; -#if !defined(CONFIG_ARCH_HAS_PTE_DEVMAP) || !defined(CONFIG_TRANSPARENT_HUGEPAGE) -static inline int pmd_devmap(pmd_t pmd) -{ - return 0; -} -static inline int pud_devmap(pud_t pud) -{ - return 0; -} -static inline int pgd_devmap(pgd_t pgd) -{ - return 0; -} -#endif - /* * FIXME: take this include out, include page-flags.h in * files which need it (119 of them) -- cgit v1.2.3 From e1608f3fa857b600045b6df7f7dadc70eeaa4496 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 29 Nov 2019 23:29:11 +0100 Subject: bpf: Avoid setting bpf insns pages read-only when prog is jited For the case where the interpreter is compiled out or when the prog is jited it is completely unnecessary to set the BPF insn pages as read-only. In fact, on frequent churn of BPF programs, it could lead to performance degradation of the system over time since it would break the direct map down to 4k pages when calling set_memory_ro() for the insn buffer on x86-64 / arm64 and there is no reverse operation. Thus, avoid breaking up large pages for data maps, and only limit this to the module range used by the JIT where it is necessary to set the image read-only and executable. Suggested-by: Peter Zijlstra Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20191129222911.3710-1-daniel@iogearbox.net --- include/linux/filter.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 1b1e8b8f88da..a141cb07e76a 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -776,8 +776,12 @@ bpf_ctx_narrow_access_offset(u32 off, u32 size, u32 size_default) static inline void bpf_prog_lock_ro(struct bpf_prog *fp) { - set_vm_flush_reset_perms(fp); - set_memory_ro((unsigned long)fp, fp->pages); +#ifndef CONFIG_BPF_JIT_ALWAYS_ON + if (!fp->jited) { + set_vm_flush_reset_perms(fp); + set_memory_ro((unsigned long)fp, fp->pages); + } +#endif } static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr) -- cgit v1.2.3 From feec24a6139d4640c6ef344e0271a8cd4d509e60 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Sat, 30 Nov 2019 17:53:38 -0800 Subject: mm, soft-offline: convert parameter to pfn Currently soft_offline_page() receives struct page, and its sibling memory_failure() receives pfn. This discrepancy looks weird and makes precheck on pfn validity tricky. So let's align them. Link: http://lkml.kernel.org/r/20191016234706.GA5493@www9186uo.sakura.ne.jp Signed-off-by: Naoya Horiguchi Acked-by: Andrew Morton Cc: David Hildenbrand Cc: Michal Hocko Cc: Oscar Salvador Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 06b51d8728ec..19a0e687878a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2773,7 +2773,7 @@ extern int sysctl_memory_failure_early_kill; extern int sysctl_memory_failure_recovery; extern void shake_page(struct page *p, int access); extern atomic_long_t num_poisoned_pages __read_mostly; -extern int soft_offline_page(struct page *page, int flags); +extern int soft_offline_page(unsigned long pfn, int flags); /* -- cgit v1.2.3 From 18db149120c106cf2b1a2595f82f3229f9d223b8 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Sat, 30 Nov 2019 17:53:51 -0800 Subject: mm/memory_hotplug: export generic_online_page() Patch series "mm/memory_hotplug: Export generic_online_page()". Let's replace the __online_page...() functions by generic_online_page(). Hyper-V only wants to delay the actual onlining of un-backed pages, so we can simpy re-use the generic function. This patch (of 3): Let's expose generic_online_page() so online_page_callback users can simply fall back to the generic implementation when actually deciding to online the pages. Link: http://lkml.kernel.org/r/20190909114830.662-2-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Michal Hocko Cc: Oscar Salvador Cc: Pavel Tatashin Cc: Dan Williams Cc: Wei Yang Cc: Qian Cai Cc: Haiyang Zhang Cc: "K. Y. Srinivasan" Cc: Sasha Levin Cc: Stephen Hemminger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index f46ea71b4ffd..3b3b1c7641fe 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -102,6 +102,7 @@ extern unsigned long __offline_isolated_pages(unsigned long start_pfn, typedef void (*online_page_callback_t)(struct page *page, unsigned int order); +extern void generic_online_page(struct page *page, unsigned int order); extern int set_online_page_callback(online_page_callback_t callback); extern int restore_online_page_callback(online_page_callback_t callback); -- cgit v1.2.3 From 0ec47097434847c0c3a3bb7287feb46386a62720 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Sat, 30 Nov 2019 17:54:00 -0800 Subject: mm/memory_hotplug: remove __online_page_free() and __online_page_increment_counters() Let's drop the now unused functions. Link: http://lkml.kernel.org/r/20190909114830.662-4-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Michal Hocko Cc: Oscar Salvador Cc: Pavel Tatashin Cc: Wei Yang Cc: Dan Williams Cc: Qian Cai Cc: Haiyang Zhang Cc: "K. Y. Srinivasan" Cc: Sasha Levin Cc: Stephen Hemminger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 3b3b1c7641fe..fb638cadf8c0 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -107,8 +107,6 @@ extern int set_online_page_callback(online_page_callback_t callback); extern int restore_online_page_callback(online_page_callback_t callback); extern void __online_page_set_limits(struct page *page); -extern void __online_page_increment_counters(struct page *page); -extern void __online_page_free(struct page *page); extern int try_online_node(int nid); -- cgit v1.2.3 From 756d25be457fc5497da0ceee0f3d0c9eb4d8535d Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Sat, 30 Nov 2019 17:54:07 -0800 Subject: mm/page_isolation.c: convert SKIP_HWPOISON to MEMORY_OFFLINE We have two types of users of page isolation: 1. Memory offlining: Offline memory so it can be unplugged. Memory won't be touched. 2. Memory allocation: Allocate memory (e.g., alloc_contig_range()) to become the owner of the memory and make use of it. For example, in case we want to offline memory, we can ignore (skip over) PageHWPoison() pages, as the memory won't get used. We can allow to offline memory. In contrast, we don't want to allow to allocate such memory. Let's generalize the approach so we can special case other types of pages we want to skip over in case we offline memory. While at it, also pass the same flags to test_pages_isolated(). Link: http://lkml.kernel.org/r/20191021172353.3056-3-david@redhat.com Signed-off-by: David Hildenbrand Suggested-by: Michal Hocko Acked-by: Michal Hocko Cc: Oscar Salvador Cc: Anshuman Khandual Cc: David Hildenbrand Cc: Pingfan Liu Cc: Qian Cai Cc: Pavel Tatashin Cc: Dan Williams Cc: Vlastimil Babka Cc: Mel Gorman Cc: Mike Rapoport Cc: Alexander Duyck Cc: Mike Rapoport Cc: Pavel Tatashin Cc: Wei Yang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-isolation.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h index 1099c2fee20f..6861df759fad 100644 --- a/include/linux/page-isolation.h +++ b/include/linux/page-isolation.h @@ -30,7 +30,7 @@ static inline bool is_migrate_isolate(int migratetype) } #endif -#define SKIP_HWPOISON 0x1 +#define MEMORY_OFFLINE 0x1 #define REPORT_FAILURE 0x2 bool has_unmovable_pages(struct zone *zone, struct page *page, int count, @@ -58,7 +58,7 @@ undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, * Test all pages in [start_pfn, end_pfn) are isolated or not. */ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn, - bool skip_hwpoisoned_pages); + int isol_flags); struct page *alloc_migrate_target(struct page *page, unsigned long private); -- cgit v1.2.3 From aba9817da150e9dcf4c599c0508c38d1971d66e1 Mon Sep 17 00:00:00 2001 From: "Ben Dooks (Codethink)" Date: Sat, 30 Nov 2019 17:54:10 -0800 Subject: include/linux/memory_hotplug.h: move definitions of {set,clear}_zone_contiguous The {set,clear}_zone_contiguous are built whatever the configuratoon so move the definitions outside the current ifdef to avoid the following compiler warnings: mm/page_alloc.c:1550:6: warning: no previous prototype for 'set_zone_contiguous' [-Wmissing-prototypes] mm/page_alloc.c:1571:6: warning: no previous prototype for 'clear_zone_contiguous' [-Wmissing-prototypes] Link: http://lkml.kernel.org/r/20191106123911.7435-1-ben.dooks@codethink.co.uk Signed-off-by: Ben Dooks (Codethink) Acked-by: Michal Hocko Reviewed-by: David Hildenbrand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index fb638cadf8c0..101d97e7e2ac 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -228,9 +228,6 @@ void put_online_mems(void); void mem_hotplug_begin(void); void mem_hotplug_done(void); -extern void set_zone_contiguous(struct zone *zone); -extern void clear_zone_contiguous(struct zone *zone); - #else /* ! CONFIG_MEMORY_HOTPLUG */ #define pfn_to_online_page(pfn) \ ({ \ @@ -338,6 +335,9 @@ static inline int remove_memory(int nid, u64 start, u64 size) static inline void __remove_memory(int nid, u64 start, u64 size) {} #endif /* CONFIG_MEMORY_HOTREMOVE */ +extern void set_zone_contiguous(struct zone *zone); +extern void clear_zone_contiguous(struct zone *zone); + extern void __ref free_area_init_core_hotplug(int nid); extern int __add_memory(int nid, u64 start, u64 size); extern int add_memory(int nid, u64 start, u64 size); -- cgit v1.2.3 From 3c5c3cfb9ef4da957e3357a2bd36f76ee34c0862 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Sat, 30 Nov 2019 17:54:50 -0800 Subject: kasan: support backing vmalloc space with real shadow memory Patch series "kasan: support backing vmalloc space with real shadow memory", v11. Currently, vmalloc space is backed by the early shadow page. This means that kasan is incompatible with VMAP_STACK. This series provides a mechanism to back vmalloc space with real, dynamically allocated memory. I have only wired up x86, because that's the only currently supported arch I can work with easily, but it's very easy to wire up other architectures, and it appears that there is some work-in-progress code to do this on arm64 and s390. This has been discussed before in the context of VMAP_STACK: - https://bugzilla.kernel.org/show_bug.cgi?id=202009 - https://lkml.org/lkml/2018/7/22/198 - https://lkml.org/lkml/2019/7/19/822 In terms of implementation details: Most mappings in vmalloc space are small, requiring less than a full page of shadow space. Allocating a full shadow page per mapping would therefore be wasteful. Furthermore, to ensure that different mappings use different shadow pages, mappings would have to be aligned to KASAN_SHADOW_SCALE_SIZE * PAGE_SIZE. Instead, share backing space across multiple mappings. Allocate a backing page when a mapping in vmalloc space uses a particular page of the shadow region. This page can be shared by other vmalloc mappings later on. We hook in to the vmap infrastructure to lazily clean up unused shadow memory. Testing with test_vmalloc.sh on an x86 VM with 2 vCPUs shows that: - Turning on KASAN, inline instrumentation, without vmalloc, introuduces a 4.1x-4.2x slowdown in vmalloc operations. - Turning this on introduces the following slowdowns over KASAN: * ~1.76x slower single-threaded (test_vmalloc.sh performance) * ~2.18x slower when both cpus are performing operations simultaneously (test_vmalloc.sh sequential_test_order=1) This is unfortunate but given that this is a debug feature only, not the end of the world. The benchmarks are also a stress-test for the vmalloc subsystem: they're not indicative of an overall 2x slowdown! This patch (of 4): Hook into vmalloc and vmap, and dynamically allocate real shadow memory to back the mappings. Most mappings in vmalloc space are small, requiring less than a full page of shadow space. Allocating a full shadow page per mapping would therefore be wasteful. Furthermore, to ensure that different mappings use different shadow pages, mappings would have to be aligned to KASAN_SHADOW_SCALE_SIZE * PAGE_SIZE. Instead, share backing space across multiple mappings. Allocate a backing page when a mapping in vmalloc space uses a particular page of the shadow region. This page can be shared by other vmalloc mappings later on. We hook in to the vmap infrastructure to lazily clean up unused shadow memory. To avoid the difficulties around swapping mappings around, this code expects that the part of the shadow region that covers the vmalloc space will not be covered by the early shadow page, but will be left unmapped. This will require changes in arch-specific code. This allows KASAN with VMAP_STACK, and may be helpful for architectures that do not have a separate module space (e.g. powerpc64, which I am currently working on). It also allows relaxing the module alignment back to PAGE_SIZE. Testing with test_vmalloc.sh on an x86 VM with 2 vCPUs shows that: - Turning on KASAN, inline instrumentation, without vmalloc, introuduces a 4.1x-4.2x slowdown in vmalloc operations. - Turning this on introduces the following slowdowns over KASAN: * ~1.76x slower single-threaded (test_vmalloc.sh performance) * ~2.18x slower when both cpus are performing operations simultaneously (test_vmalloc.sh sequential_test_order=3D1) This is unfortunate but given that this is a debug feature only, not the end of the world. The full benchmark results are: Performance No KASAN KASAN original x baseline KASAN vmalloc x baseline x KASAN fix_size_alloc_test 662004 11404956 17.23 19144610 28.92 1.68 full_fit_alloc_test 710950 12029752 16.92 13184651 18.55 1.10 long_busy_list_alloc_test 9431875 43990172 4.66 82970178 8.80 1.89 random_size_alloc_test 5033626 23061762 4.58 47158834 9.37 2.04 fix_align_alloc_test 1252514 15276910 12.20 31266116 24.96 2.05 random_size_align_alloc_te 1648501 14578321 8.84 25560052 15.51 1.75 align_shift_alloc_test 147 830 5.65 5692 38.72 6.86 pcpu_alloc_test 80732 125520 1.55 140864 1.74 1.12 Total Cycles 119240774314 763211341128 6.40 1390338696894 11.66 1.82 Sequential, 2 cpus No KASAN KASAN original x baseline KASAN vmalloc x baseline x KASAN fix_size_alloc_test 1423150 14276550 10.03 27733022 19.49 1.94 full_fit_alloc_test 1754219 14722640 8.39 15030786 8.57 1.02 long_busy_list_alloc_test 11451858 52154973 4.55 107016027 9.34 2.05 random_size_alloc_test 5989020 26735276 4.46 68885923 11.50 2.58 fix_align_alloc_test 2050976 20166900 9.83 50491675 24.62 2.50 random_size_align_alloc_te 2858229 17971700 6.29 38730225 13.55 2.16 align_shift_alloc_test 405 6428 15.87 26253 64.82 4.08 pcpu_alloc_test 127183 151464 1.19 216263 1.70 1.43 Total Cycles 54181269392 308723699764 5.70 650772566394 12.01 2.11 fix_size_alloc_test 1420404 14289308 10.06 27790035 19.56 1.94 full_fit_alloc_test 1736145 14806234 8.53 15274301 8.80 1.03 long_busy_list_alloc_test 11404638 52270785 4.58 107550254 9.43 2.06 random_size_alloc_test 6017006 26650625 4.43 68696127 11.42 2.58 fix_align_alloc_test 2045504 20280985 9.91 50414862 24.65 2.49 random_size_align_alloc_te 2845338 17931018 6.30 38510276 13.53 2.15 align_shift_alloc_test 472 3760 7.97 9656 20.46 2.57 pcpu_alloc_test 118643 132732 1.12 146504 1.23 1.10 Total Cycles 54040011688 309102805492 5.72 651325675652 12.05 2.11 [dja@axtens.net: fixups] Link: http://lkml.kernel.org/r/20191120052719.7201-1-dja@axtens.net Link: https://bugzilla.kernel.org/show_bug.cgi?id=3D202009 Link: http://lkml.kernel.org/r/20191031093909.9228-2-dja@axtens.net Signed-off-by: Mark Rutland [shadow rework] Signed-off-by: Daniel Axtens Co-developed-by: Mark Rutland Acked-by: Vasily Gorbik Reviewed-by: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Christophe Leroy Cc: Qian Cai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 31 +++++++++++++++++++++++++++++++ include/linux/moduleloader.h | 2 +- include/linux/vmalloc.h | 12 ++++++++++++ 3 files changed, 44 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index cc8a03cc9674..4f404c565db1 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -70,8 +70,18 @@ struct kasan_cache { int free_meta_offset; }; +/* + * These functions provide a special case to support backing module + * allocations with real shadow memory. With KASAN vmalloc, the special + * case is unnecessary, as the work is handled in the generic case. + */ +#ifndef CONFIG_KASAN_VMALLOC int kasan_module_alloc(void *addr, size_t size); void kasan_free_shadow(const struct vm_struct *vm); +#else +static inline int kasan_module_alloc(void *addr, size_t size) { return 0; } +static inline void kasan_free_shadow(const struct vm_struct *vm) {} +#endif int kasan_add_zero_shadow(void *start, unsigned long size); void kasan_remove_zero_shadow(void *start, unsigned long size); @@ -194,4 +204,25 @@ static inline void *kasan_reset_tag(const void *addr) #endif /* CONFIG_KASAN_SW_TAGS */ +#ifdef CONFIG_KASAN_VMALLOC +int kasan_populate_vmalloc(unsigned long requested_size, + struct vm_struct *area); +void kasan_poison_vmalloc(void *start, unsigned long size); +void kasan_release_vmalloc(unsigned long start, unsigned long end, + unsigned long free_region_start, + unsigned long free_region_end); +#else +static inline int kasan_populate_vmalloc(unsigned long requested_size, + struct vm_struct *area) +{ + return 0; +} + +static inline void kasan_poison_vmalloc(void *start, unsigned long size) {} +static inline void kasan_release_vmalloc(unsigned long start, + unsigned long end, + unsigned long free_region_start, + unsigned long free_region_end) {} +#endif + #endif /* LINUX_KASAN_H */ diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h index 5229c18025e9..ca92aea8a6bd 100644 --- a/include/linux/moduleloader.h +++ b/include/linux/moduleloader.h @@ -91,7 +91,7 @@ void module_arch_cleanup(struct module *mod); /* Any cleanup before freeing mod->module_init */ void module_arch_freeing_init(struct module *mod); -#ifdef CONFIG_KASAN +#if defined(CONFIG_KASAN) && !defined(CONFIG_KASAN_VMALLOC) #include #define MODULE_ALIGN (PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT) #else diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index b4c58a191eb1..a4b241102771 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -22,6 +22,18 @@ struct notifier_block; /* in notifier.h */ #define VM_UNINITIALIZED 0x00000020 /* vm_struct is not fully initialized */ #define VM_NO_GUARD 0x00000040 /* don't add guard page */ #define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */ + +/* + * VM_KASAN is used slighly differently depending on CONFIG_KASAN_VMALLOC. + * + * If IS_ENABLED(CONFIG_KASAN_VMALLOC), VM_KASAN is set on a vm_struct after + * shadow memory has been mapped. It's used to handle allocation errors so that + * we don't try to poision shadow on free if it was never allocated. + * + * Otherwise, VM_KASAN is set for kasan_module_alloc() allocations and used to + * determine which allocations need the module shadow freed. + */ + /* * Memory with VM_FLUSH_RESET_PERMS cannot be freed in an interrupt or with * vfree_atomic(). -- cgit v1.2.3 From 5e27a2df03b8933aa7c1579816ecb6a071bb0e0d Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Sat, 30 Nov 2019 17:55:06 -0800 Subject: mm/page_alloc: add alloc_contig_pages() HugeTLB helper alloc_gigantic_page() implements fairly generic allocation method where it scans over various zones looking for a large contiguous pfn range before trying to allocate it with alloc_contig_range(). Other than deriving the requested order from 'struct hstate', there is nothing HugeTLB specific in there. This can be made available for general use to allocate contiguous memory which could not have been allocated through the buddy allocator. alloc_gigantic_page() has been split carving out actual allocation method which is then made available via new alloc_contig_pages() helper wrapped under CONFIG_CONTIG_ALLOC. All references to 'gigantic' have been replaced with more generic term 'contig'. Allocated pages here should be freed with free_contig_range() or by calling __free_page() on each allocated page. Link: http://lkml.kernel.org/r/1571300646-32240-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Acked-by: David Hildenbrand Acked-by: Michal Hocko Cc: Mike Kravetz Cc: Vlastimil Babka Cc: Michal Hocko Cc: David Rientjes Cc: Andrea Arcangeli Cc: Oscar Salvador Cc: Mel Gorman Cc: Mike Rapoport Cc: Dan Williams Cc: Pavel Tatashin Cc: Matthew Wilcox Cc: David Hildenbrand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 61f2f6ff9467..e5b817cb86e7 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -612,6 +612,8 @@ static inline bool pm_suspended_storage(void) /* The below functions must be run on a range from a single zone. */ extern int alloc_contig_range(unsigned long start, unsigned long end, unsigned migratetype, gfp_t gfp_mask); +extern struct page *alloc_contig_pages(unsigned long nr_pages, gfp_t gfp_mask, + int nid, nodemask_t *nodemask); #endif void free_contig_range(unsigned long pfn, unsigned int nr_pages); -- cgit v1.2.3 From 68265390f9aa625e2ce94ed1bcff8906db702d79 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Sat, 30 Nov 2019 17:55:15 -0800 Subject: mm, pcpu: make zone pcp updates and reset internal to the mm Memory hotplug needs to be able to reset and reinit the pcpu allocator batch and high limits but this action is internal to the VM. Move the declaration to internal.h Link: http://lkml.kernel.org/r/20191021094808.28824-4-mgorman@techsingularity.net Signed-off-by: Mel Gorman Acked-by: Michal Hocko Acked-by: Vlastimil Babka Cc: Borislav Petkov Cc: Matt Fleming Cc: Qian Cai Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 19a0e687878a..8b0ef04b6d15 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2207,9 +2207,6 @@ void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...); extern void setup_per_cpu_pageset(void); -extern void zone_pcp_update(struct zone *zone); -extern void zone_pcp_reset(struct zone *zone); - /* page_alloc.c */ extern int min_free_kbytes; extern int watermark_boost_factor; -- cgit v1.2.3 From 653e003d7f37716f84c17edcad3c228497888bfc Mon Sep 17 00:00:00 2001 From: Hao Lee Date: Sat, 30 Nov 2019 17:55:18 -0800 Subject: include/linux/mmzone.h: fix comment for ISOLATE_UNMAPPED macro Both file-backed pages and anonymous pages can be unmapped. ISOLATE_UNMAPPED is not just for file-backed pages. Link: http://lkml.kernel.org/r/20191024151621.GA20400@haolee.github.io Signed-off-by: Hao Lee Reviewed-by: Andrew Morton Cc: Vlastimil Babka Cc: Dan Williams Cc: Michal Hocko Cc: Wei Yang Cc: Mel Gorman Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index b0a36d1580b6..c7fb21f19edd 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -308,7 +308,7 @@ struct lruvec { #endif }; -/* Isolate unmapped file */ +/* Isolate unmapped pages */ #define ISOLATE_UNMAPPED ((__force isolate_mode_t)0x2) /* Isolate for asynchronous migration */ #define ISOLATE_ASYNC_MIGRATE ((__force isolate_mode_t)0x4) -- cgit v1.2.3 From 867e5e1de14b2b2bde324cdfeec3f3f83eb21424 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Sat, 30 Nov 2019 17:55:34 -0800 Subject: mm: clean up and clarify lruvec lookup procedure There is a per-memcg lruvec and a NUMA node lruvec. Which one is being used is somewhat confusing right now, and it's easy to make mistakes - especially when it comes to global reclaim. How it works: when memory cgroups are enabled, we always use the root_mem_cgroup's per-node lruvecs. When memory cgroups are not compiled in or disabled at runtime, we use pgdat->lruvec. Document that in a comment. Due to the way the reclaim code is generalized, all lookups use the mem_cgroup_lruvec() helper function, and nobody should have to find the right lruvec manually right now. But to avoid future mistakes, rename the pgdat->lruvec member to pgdat->__lruvec and delete the convenience wrapper that suggests it's a commonly accessed member. While in this area, swap the mem_cgroup_lruvec() argument order. The name suggests a memcg operation, yet it takes a pgdat first and a memcg second. I have to double take every time I call this. Fix that. Link: http://lkml.kernel.org/r/20191022144803.302233-3-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Acked-by: Michal Hocko Reviewed-by: Shakeel Butt Cc: Roman Gushchin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 24 ++++++++++++------------ include/linux/mmzone.h | 15 ++++++++------- 2 files changed, 20 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 239e752a7817..feeb2c76f568 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -385,21 +385,21 @@ mem_cgroup_nodeinfo(struct mem_cgroup *memcg, int nid) } /** - * mem_cgroup_lruvec - get the lru list vector for a node or a memcg zone - * @node: node of the wanted lruvec + * mem_cgroup_lruvec - get the lru list vector for a memcg & node * @memcg: memcg of the wanted lruvec * - * Returns the lru list vector holding pages for a given @node or a given - * @memcg. This can be the node lruvec, if the memory controller is disabled. + * Returns the lru list vector holding pages for a given @memcg & + * @node combination. This can be the node lruvec, if the memory + * controller is disabled. */ -static inline struct lruvec *mem_cgroup_lruvec(struct pglist_data *pgdat, - struct mem_cgroup *memcg) +static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg, + struct pglist_data *pgdat) { struct mem_cgroup_per_node *mz; struct lruvec *lruvec; if (mem_cgroup_disabled()) { - lruvec = node_lruvec(pgdat); + lruvec = &pgdat->__lruvec; goto out; } @@ -718,7 +718,7 @@ static inline void __mod_lruvec_page_state(struct page *page, return; } - lruvec = mem_cgroup_lruvec(pgdat, page->mem_cgroup); + lruvec = mem_cgroup_lruvec(page->mem_cgroup, pgdat); __mod_lruvec_state(lruvec, idx, val); } @@ -889,16 +889,16 @@ static inline void mem_cgroup_migrate(struct page *old, struct page *new) { } -static inline struct lruvec *mem_cgroup_lruvec(struct pglist_data *pgdat, - struct mem_cgroup *memcg) +static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg, + struct pglist_data *pgdat) { - return node_lruvec(pgdat); + return &pgdat->__lruvec; } static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct pglist_data *pgdat) { - return &pgdat->lruvec; + return &pgdat->__lruvec; } static inline bool mm_match_cgroup(struct mm_struct *mm, diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index c7fb21f19edd..cc8232a100bd 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -777,7 +777,13 @@ typedef struct pglist_data { #endif /* Fields commonly accessed by the page reclaim scanner */ - struct lruvec lruvec; + + /* + * NOTE: THIS IS UNUSED IF MEMCG IS ENABLED. + * + * Use mem_cgroup_lruvec() to look up lruvecs. + */ + struct lruvec __lruvec; unsigned long flags; @@ -800,11 +806,6 @@ typedef struct pglist_data { #define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) #define node_end_pfn(nid) pgdat_end_pfn(NODE_DATA(nid)) -static inline struct lruvec *node_lruvec(struct pglist_data *pgdat) -{ - return &pgdat->lruvec; -} - static inline unsigned long pgdat_end_pfn(pg_data_t *pgdat) { return pgdat->node_start_pfn + pgdat->node_spanned_pages; @@ -842,7 +843,7 @@ static inline struct pglist_data *lruvec_pgdat(struct lruvec *lruvec) #ifdef CONFIG_MEMCG return lruvec->pgdat; #else - return container_of(lruvec, struct pglist_data, lruvec); + return container_of(lruvec, struct pglist_data, __lruvec); #endif } -- cgit v1.2.3 From 1b05117df78e035afb5f66ef50bf8750d976ef08 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Sat, 30 Nov 2019 17:55:52 -0800 Subject: mm: vmscan: harmonize writeback congestion tracking for nodes & memcgs The current writeback congestion tracking has separate flags for kswapd reclaim (node level) and cgroup limit reclaim (memcg-node level). This is unnecessarily complicated: the lruvec is an existing abstraction layer for that node-memcg intersection. Introduce lruvec->flags and LRUVEC_CONGESTED. Then track that at the reclaim root level, which is either the NUMA node for global reclaim, or the cgroup-node intersection for cgroup reclaim. Link: http://lkml.kernel.org/r/20191022144803.302233-9-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Reviewed-by: Roman Gushchin Reviewed-by: Shakeel Butt Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 6 +++--- include/linux/mmzone.h | 11 ++++++++--- 2 files changed, 11 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index feeb2c76f568..5b86287fa069 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -132,9 +132,6 @@ struct mem_cgroup_per_node { unsigned long usage_in_excess;/* Set to the value by which */ /* the soft limit is exceeded*/ bool on_tree; - bool congested; /* memcg has many dirty pages */ - /* backed by a congested BDI */ - struct mem_cgroup *memcg; /* Back pointer, we cannot */ /* use container_of */ }; @@ -403,6 +400,9 @@ static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg, goto out; } + if (!memcg) + memcg = root_mem_cgroup; + mz = mem_cgroup_nodeinfo(memcg, pgdat->node_id); lruvec = &mz->lruvec; out: diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index cc8232a100bd..ddee00e91a22 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -296,6 +296,12 @@ struct zone_reclaim_stat { unsigned long recent_scanned[2]; }; +enum lruvec_flags { + LRUVEC_CONGESTED, /* lruvec has many dirty pages + * backed by a congested BDI + */ +}; + struct lruvec { struct list_head lists[NR_LRU_LISTS]; struct zone_reclaim_stat reclaim_stat; @@ -303,6 +309,8 @@ struct lruvec { atomic_long_t inactive_age; /* Refaults at the time of last reclaim cycle */ unsigned long refaults; + /* Various lruvec state flags (enum lruvec_flags) */ + unsigned long flags; #ifdef CONFIG_MEMCG struct pglist_data *pgdat; #endif @@ -572,9 +580,6 @@ struct zone { } ____cacheline_internodealigned_in_smp; enum pgdat_flags { - PGDAT_CONGESTED, /* pgdat has many dirty pages backed by - * a congested BDI - */ PGDAT_DIRTY, /* reclaim scanning has recently found * many dirty file pages at the tail * of the LRU. -- cgit v1.2.3 From b910718a948a9120d90faf632b33ed23c70e266a Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Sat, 30 Nov 2019 17:55:59 -0800 Subject: mm: vmscan: detect file thrashing at the reclaim root We use refault information to determine whether the cache workingset is stable or transitioning, and dynamically adjust the inactive:active file LRU ratio so as to maximize protection from one-off cache during stable periods, and minimize IO during transitions. With cgroups and their nested LRU lists, we currently don't do this correctly. While recursive cgroup reclaim establishes a relative LRU order among the pages of all involved cgroups, refaults only affect the local LRU order in the cgroup in which they are occuring. As a result, cache transitions can take longer in a cgrouped system as the active pages of sibling cgroups aren't challenged when they should be. [ Right now, this is somewhat theoretical, because the siblings, under continued regular reclaim pressure, should eventually run out of inactive pages - and since inactive:active *size* balancing is also done on a cgroup-local level, we will challenge the active pages eventually in most cases. But the next patch will move that relative size enforcement to the reclaim root as well, and then this patch here will be necessary to propagate refault pressure to siblings. ] This patch moves refault detection to the root of reclaim. Instead of remembering the cgroup owner of an evicted page, remember the cgroup that caused the reclaim to happen. When refaults later occur, they'll correctly influence the cross-cgroup LRU order that reclaim follows. I.e. if global reclaim kicked out pages in some subgroup A/B/C, the refault of those pages will challenge the global LRU order, and not just the local order down inside C. [hannes@cmpxchg.org: use page_memcg() instead of another lookup] Link: http://lkml.kernel.org/r/20191115160722.GA309754@cmpxchg.org Link: http://lkml.kernel.org/r/20191107205334.158354-3-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Reviewed-by: Suren Baghdasaryan Cc: Andrey Ryabinin Cc: Michal Hocko Cc: Rik van Riel Cc: Shakeel Butt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 5 +++++ include/linux/swap.h | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 5b86287fa069..a7a0a1a5c8d5 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -901,6 +901,11 @@ static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page, return &pgdat->__lruvec; } +static inline struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg) +{ + return NULL; +} + static inline bool mm_match_cgroup(struct mm_struct *mm, struct mem_cgroup *memcg) { diff --git a/include/linux/swap.h b/include/linux/swap.h index 063c0c1e112b..1e99f7ac1d7e 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -307,7 +307,7 @@ struct vma_swap_readahead { }; /* linux/mm/workingset.c */ -void *workingset_eviction(struct page *page); +void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg); void workingset_refault(struct page *page, void *shadow); void workingset_activation(struct page *page); -- cgit v1.2.3 From b91ac374346ba206cfd568bb0ab830af6b205cfd Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Sat, 30 Nov 2019 17:56:02 -0800 Subject: mm: vmscan: enforce inactive:active ratio at the reclaim root We split the LRU lists into inactive and an active parts to maximize workingset protection while allowing just enough inactive cache space to faciltate readahead and writeback for one-off file accesses (e.g. a linear scan through a file, or logging); or just enough inactive anon to maintain recent reference information when reclaim needs to swap. With cgroups and their nested LRU lists, we currently don't do this correctly. While recursive cgroup reclaim establishes a relative LRU order among the pages of all involved cgroups, inactive:active size decisions are done on a per-cgroup level. As a result, we'll reclaim a cgroup's workingset when it doesn't have cold pages, even when one of its siblings has plenty of it that should be reclaimed first. For example: workload A has 50M worth of hot cache but doesn't do any one-off file accesses; meanwhile, parallel workload B scans files and rarely accesses the same page twice. If these workloads were to run in an uncgrouped system, A would be protected from the high rate of cache faults from B. But if they were put in parallel cgroups for memory accounting purposes, B's fast cache fault rate would push out the hot cache pages of A. This is unexpected and undesirable - the "scan resistance" of the page cache is broken. This patch moves inactive:active size balancing decisions to the root of reclaim - the same level where the LRU order is established. It does this by looking at the recursive size of the inactive and the active file sets of the cgroup subtree at the beginning of the reclaim cycle, and then making a decision - scan or skip active pages - that applies throughout the entire run and to every cgroup involved. With that in place, in the test above, the VM will recognize that there are plenty of inactive pages in the combined cache set of workloads A and B and prefer the one-off cache in B over the hot pages in A. The scan resistance of the cache is restored. [cai@lca.pw: fix some -Wenum-conversion warnings] Link: http://lkml.kernel.org/r/1573848697-29262-1-git-send-email-cai@lca.pw Link: http://lkml.kernel.org/r/20191107205334.158354-4-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Reviewed-by: Suren Baghdasaryan Reviewed-by: Shakeel Butt Cc: Andrey Ryabinin Cc: Rik van Riel Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index ddee00e91a22..d9e62b0b584e 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -273,12 +273,12 @@ enum lru_list { #define for_each_evictable_lru(lru) for (lru = 0; lru <= LRU_ACTIVE_FILE; lru++) -static inline int is_file_lru(enum lru_list lru) +static inline bool is_file_lru(enum lru_list lru) { return (lru == LRU_INACTIVE_FILE || lru == LRU_ACTIVE_FILE); } -static inline int is_active_lru(enum lru_list lru) +static inline bool is_active_lru(enum lru_list lru) { return (lru == LRU_ACTIVE_ANON || lru == LRU_ACTIVE_FILE); } -- cgit v1.2.3 From 0ac398b171aacd0f0c132d989ec4efb5de94f34a Mon Sep 17 00:00:00 2001 From: Yunfeng Ye Date: Sat, 30 Nov 2019 17:56:27 -0800 Subject: mm: support memblock alloc on the exact node for sparse_buffer_init() sparse_buffer_init() use memblock_alloc_try_nid_raw() to allocate memory for page management structure, if memory allocation fails from specified node, it will fall back to allocate from other nodes. Normally, the page management structure will not exceed 2% of the total memory, but a large continuous block of allocation is needed. In most cases, memory allocation from the specified node will succeed, but a node memory become highly fragmented will fail. we expect to allocate memory base section rather than by allocating a large block of memory from other NUMA nodes Add memblock_alloc_exact_nid_raw() for this situation, which allocate boot memory block on the exact node. If a large contiguous block memory allocate fail in sparse_buffer_init(), it will fall back to allocate small block memory base section. Link: http://lkml.kernel.org/r/66755ea7-ab10-8882-36fd-3e02b03775d5@huawei.com Signed-off-by: Yunfeng Ye Reviewed-by: Mike Rapoport Cc: Wei Yang Cc: Oscar Salvador Cc: Dan Williams Cc: David Hildenbrand Cc: Qian Cai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index f491690d54c6..b38bbefabfab 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -358,6 +358,9 @@ static inline phys_addr_t memblock_phys_alloc(phys_addr_t size, MEMBLOCK_ALLOC_ACCESSIBLE); } +void *memblock_alloc_exact_nid_raw(phys_addr_t size, phys_addr_t align, + phys_addr_t min_addr, phys_addr_t max_addr, + int nid); void *memblock_alloc_try_nid_raw(phys_addr_t size, phys_addr_t align, phys_addr_t min_addr, phys_addr_t max_addr, int nid); -- cgit v1.2.3 From 552546366a30d88bd1d6f5efe848b2ab50fd57e5 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Sat, 30 Nov 2019 17:56:30 -0800 Subject: hugetlbfs: hugetlb_fault_mutex_hash() cleanup A new clang diagnostic (-Wsizeof-array-div) warns about the calculation to determine the number of u32's in an array of unsigned longs. Suppress warning by adding parentheses. While looking at the above issue, noticed that the 'address' parameter to hugetlb_fault_mutex_hash is no longer used. So, remove it from the definition and all callers. No functional change. Link: http://lkml.kernel.org/r/20190919011847.18400-1-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Reported-by: Nathan Chancellor Reviewed-by: Nathan Chancellor Reviewed-by: Davidlohr Bueso Reviewed-by: Andrew Morton Cc: Nick Desaulniers Cc: Ilie Halip Cc: David Bolvansky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 53fc34f930d0..d3814bd686ba 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -106,7 +106,7 @@ void free_huge_page(struct page *page); void hugetlb_fix_reserve_counts(struct inode *inode); extern struct mutex *hugetlb_fault_mutex_table; u32 hugetlb_fault_mutex_hash(struct hstate *h, struct address_space *mapping, - pgoff_t idx, unsigned long address); + pgoff_t idx); pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud); -- cgit v1.2.3 From 1f9dccb25b8fb48778149a002bb25d4ac2899633 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Sat, 30 Nov 2019 17:56:40 -0800 Subject: hugetlbfs: convert macros to static inline, fix sparse warning huge_pte_offset() produced a sparse warning due to an improper return type when the kernel was built with !CONFIG_HUGETLB_PAGE. Fix the bad type and also convert all the macros in this block to static inline wrappers. Two existing wrappers in this block had lines in excess of 80 columns so clean those up as well. No functional change. Link: http://lkml.kernel.org/r/20191112194558.139389-3-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Reported-by: Ben Dooks Suggested-by: Jason Gunthorpe Cc: Michael Ellerman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 137 ++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 115 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index d3814bd686ba..159d2012cdb1 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -164,38 +164,130 @@ static inline void adjust_range_if_pmd_sharing_possible( { } -#define follow_hugetlb_page(m,v,p,vs,a,b,i,w,n) ({ BUG(); 0; }) -#define follow_huge_addr(mm, addr, write) ERR_PTR(-EINVAL) -#define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; }) +static inline long follow_hugetlb_page(struct mm_struct *mm, + struct vm_area_struct *vma, struct page **pages, + struct vm_area_struct **vmas, unsigned long *position, + unsigned long *nr_pages, long i, unsigned int flags, + int *nonblocking) +{ + BUG(); + return 0; +} + +static inline struct page *follow_huge_addr(struct mm_struct *mm, + unsigned long address, int write) +{ + return ERR_PTR(-EINVAL); +} + +static inline int copy_hugetlb_page_range(struct mm_struct *dst, + struct mm_struct *src, struct vm_area_struct *vma) +{ + BUG(); + return 0; +} + static inline void hugetlb_report_meminfo(struct seq_file *m) { } -#define hugetlb_report_node_meminfo(n, buf) 0 + +static inline int hugetlb_report_node_meminfo(int nid, char *buf) +{ + return 0; +} + static inline void hugetlb_show_meminfo(void) { } -#define follow_huge_pd(vma, addr, hpd, flags, pdshift) NULL -#define follow_huge_pmd(mm, addr, pmd, flags) NULL -#define follow_huge_pud(mm, addr, pud, flags) NULL -#define follow_huge_pgd(mm, addr, pgd, flags) NULL -#define prepare_hugepage_range(file, addr, len) (-EINVAL) -#define pmd_huge(x) 0 -#define pud_huge(x) 0 -#define is_hugepage_only_range(mm, addr, len) 0 -#define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; }) -#define hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, dst_addr, \ - src_addr, pagep) ({ BUG(); 0; }) -#define huge_pte_offset(mm, address, sz) 0 + +static inline struct page *follow_huge_pd(struct vm_area_struct *vma, + unsigned long address, hugepd_t hpd, int flags, + int pdshift) +{ + return NULL; +} + +static inline struct page *follow_huge_pmd(struct mm_struct *mm, + unsigned long address, pmd_t *pmd, int flags) +{ + return NULL; +} + +static inline struct page *follow_huge_pud(struct mm_struct *mm, + unsigned long address, pud_t *pud, int flags) +{ + return NULL; +} + +static inline struct page *follow_huge_pgd(struct mm_struct *mm, + unsigned long address, pgd_t *pgd, int flags) +{ + return NULL; +} + +static inline int prepare_hugepage_range(struct file *file, + unsigned long addr, unsigned long len) +{ + return -EINVAL; +} + +static inline int pmd_huge(pmd_t pmd) +{ + return 0; +} + +static inline int pud_huge(pud_t pud) +{ + return 0; +} + +static inline int is_hugepage_only_range(struct mm_struct *mm, + unsigned long addr, unsigned long len) +{ + return 0; +} + +static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, + unsigned long addr, unsigned long end, + unsigned long floor, unsigned long ceiling) +{ + BUG(); +} + +static inline int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, + pte_t *dst_pte, + struct vm_area_struct *dst_vma, + unsigned long dst_addr, + unsigned long src_addr, + struct page **pagep) +{ + BUG(); + return 0; +} + +static inline pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, + unsigned long sz) +{ + return NULL; +} static inline bool isolate_huge_page(struct page *page, struct list_head *list) { return false; } -#define putback_active_hugepage(p) do {} while (0) -#define move_hugetlb_state(old, new, reason) do {} while (0) -static inline unsigned long hugetlb_change_protection(struct vm_area_struct *vma, - unsigned long address, unsigned long end, pgprot_t newprot) +static inline void putback_active_hugepage(struct page *page) +{ +} + +static inline void move_hugetlb_state(struct page *oldpage, + struct page *newpage, int reason) +{ +} + +static inline unsigned long hugetlb_change_protection( + struct vm_area_struct *vma, unsigned long address, + unsigned long end, pgprot_t newprot) { return 0; } @@ -213,9 +305,10 @@ static inline void __unmap_hugepage_range(struct mmu_gather *tlb, { BUG(); } + static inline vm_fault_t hugetlb_fault(struct mm_struct *mm, - struct vm_area_struct *vma, unsigned long address, - unsigned int flags) + struct vm_area_struct *vma, unsigned long address, + unsigned int flags) { BUG(); return 0; -- cgit v1.2.3 From 188b04a7d93860fd100b2671600b8ad81fb0a842 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Sat, 30 Nov 2019 17:57:02 -0800 Subject: hugetlb: remove unused hstate in hugetlb_fault_mutex_hash() The first parameter hstate in function hugetlb_fault_mutex_hash() is not used anymore. This patch removes it. [akpm@linux-foundation.org: various build fixes] [cai@lca.pw: fix a GCC compilation warning] Link: http://lkml.kernel.org/r/1570544108-32331-1-git-send-email-cai@lca.pw Link: http://lkml.kernel.org/r/20191005003302.785-1-richardw.yang@linux.intel.com Signed-off-by: Wei Yang Signed-off-by: Qian Cai Suggested-by: Andrew Morton Reviewed-by: Andrew Morton Cc: Mike Kravetz Cc: Hugh Dickins Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 159d2012cdb1..31d4920994b9 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -105,8 +105,7 @@ void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason); void free_huge_page(struct page *page); void hugetlb_fix_reserve_counts(struct inode *inode); extern struct mutex *hugetlb_fault_mutex_table; -u32 hugetlb_fault_mutex_hash(struct hstate *h, struct address_space *mapping, - pgoff_t idx); +u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t idx); pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud); -- cgit v1.2.3 From 84218b552e0a591ac706a926d5e1e8eaf0d5a03a Mon Sep 17 00:00:00 2001 From: Hao Lee Date: Sat, 30 Nov 2019 17:58:14 -0800 Subject: mm: fix struct member name in function comments The member in struct zonelist is _zonerefs instead of zones. Link: http://lkml.kernel.org/r/20190927144049.GA29622@haolee.github.io Signed-off-by: Hao Lee Reviewed-by: Andrew Morton Reviewed-by: Wei Yang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index d9e62b0b584e..89d8ff06c9ce 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1085,7 +1085,7 @@ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist, /** * for_each_zone_zonelist_nodemask - helper macro to iterate over valid zones in a zonelist at or below a given zone index and within a nodemask * @zone - The current zone in the iterator - * @z - The current pointer within zonelist->zones being iterated + * @z - The current pointer within zonelist->_zonerefs being iterated * @zlist - The zonelist being iterated * @highidx - The zone index of the highest zone to return * @nodemask - Nodemask allowed by the allocator -- cgit v1.2.3 From 12cc1c7345b6bf34c45ccaa75393e2d6eb707d7b Mon Sep 17 00:00:00 2001 From: Souptick Joarder Date: Sat, 30 Nov 2019 17:58:20 -0800 Subject: mm/memory_hotplug.c: remove __online_page_set_limits() __online_page_set_limits() is a dummy function - remove it and all callers. Link: http://lkml.kernel.org/r/8e1bc9d3b492f6bde16e95ebc1dee11d6aefabd7.1567889743.git.jrdr.linux@gmail.com Link: http://lkml.kernel.org/r/854db2cf8145d9635249c95584d9a91fd774a229.1567889743.git.jrdr.linux@gmail.com Link: http://lkml.kernel.org/r/9afe6c5a18158f3884a6b302ac2c772f3da49ccc.1567889743.git.jrdr.linux@gmail.com Signed-off-by: Souptick Joarder Reviewed-by: David Hildenbrand Acked-by: Michal Hocko Cc: Juergen Gross Cc: "Kirill A. Shutemov" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 101d97e7e2ac..3a08ecdfca11 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -106,8 +106,6 @@ extern void generic_online_page(struct page *page, unsigned int order); extern int set_online_page_callback(online_page_callback_t callback); extern int restore_online_page_callback(online_page_callback_t callback); -extern void __online_page_set_limits(struct page *page); - extern int try_online_node(int nid); extern int arch_add_memory(int nid, u64 start, u64 size, -- cgit v1.2.3 From 040b5cfbcefa263ccf2c118c4938308606bb7ed8 Mon Sep 17 00:00:00 2001 From: Martin Varghese Date: Mon, 2 Dec 2019 10:49:51 +0530 Subject: Fixed updating of ethertype in function skb_mpls_pop The skb_mpls_pop was not updating ethertype of an ethernet packet if the packet was originally received from a non ARPHRD_ETHER device. In the below OVS data path flow, since the device corresponding to port 7 is an l3 device (ARPHRD_NONE) the skb_mpls_pop function does not update the ethertype of the packet even though the previous push_eth action had added an ethernet header to the packet. recirc_id(0),in_port(7),eth_type(0x8847), mpls(label=12/0xfffff,tc=0/0,ttl=0/0x0,bos=1/1), actions:push_eth(src=00:00:00:00:00:00,dst=00:00:00:00:00:00), pop_mpls(eth_type=0x800),4 Fixes: ed246cee09b9 ("net: core: move pop MPLS functionality from OvS to core helper") Signed-off-by: Martin Varghese Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/linux/skbuff.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7af5bec7d3b0..5aea72fe8498 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3530,7 +3530,8 @@ int skb_vlan_pop(struct sk_buff *skb); int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci); int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto, int mac_len); -int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto, int mac_len); +int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto, int mac_len, + bool ethernet); int skb_mpls_update_lse(struct sk_buff *skb, __be32 mpls_lse); int skb_mpls_dec_ttl(struct sk_buff *skb); struct sk_buff *pskb_extract(struct sk_buff *skb, int off, int to_copy, -- cgit v1.2.3 From 9e0afe3910ff7e5493c5d8ebe3b499994b5e0272 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 3 Dec 2019 11:20:37 +0100 Subject: firmware: dmi: Remember the memory type Store the memory type while walking the memory slots, and provide a way to retrieve it later. Signed-off-by: Jean Delvare --- include/linux/dmi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmi.h b/include/linux/dmi.h index 8de8c4f15163..13a48b167e2d 100644 --- a/include/linux/dmi.h +++ b/include/linux/dmi.h @@ -113,6 +113,7 @@ extern int dmi_walk(void (*decode)(const struct dmi_header *, void *), extern bool dmi_match(enum dmi_field f, const char *str); extern void dmi_memdev_name(u16 handle, const char **bank, const char **device); extern u64 dmi_memdev_size(u16 handle); +extern u8 dmi_memdev_type(u16 handle); #else @@ -142,6 +143,7 @@ static inline bool dmi_match(enum dmi_field f, const char *str) static inline void dmi_memdev_name(u16 handle, const char **bank, const char **device) { } static inline u64 dmi_memdev_size(u16 handle) { return ~0ul; } +static inline u8 dmi_memdev_type(u16 handle) { return 0x0; } static inline const struct dmi_system_id * dmi_first_match(const struct dmi_system_id *list) { return NULL; } -- cgit v1.2.3 From 7c2378800cf7ac87e2663afa7f39d102871f0c28 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 3 Dec 2019 11:20:37 +0100 Subject: firmware: dmi: Add dmi_memdev_handle Add a utility function dmi_memdev_handle() which returns the DMI handle associated with a given memory slot. This will allow kernel drivers to iterate over the memory slots. Signed-off-by: Jean Delvare --- include/linux/dmi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmi.h b/include/linux/dmi.h index 13a48b167e2d..927f8a8b7a1d 100644 --- a/include/linux/dmi.h +++ b/include/linux/dmi.h @@ -114,6 +114,7 @@ extern bool dmi_match(enum dmi_field f, const char *str); extern void dmi_memdev_name(u16 handle, const char **bank, const char **device); extern u64 dmi_memdev_size(u16 handle); extern u8 dmi_memdev_type(u16 handle); +extern u16 dmi_memdev_handle(int slot); #else @@ -144,6 +145,7 @@ static inline void dmi_memdev_name(u16 handle, const char **bank, const char **device) { } static inline u64 dmi_memdev_size(u16 handle) { return ~0ul; } static inline u8 dmi_memdev_type(u16 handle) { return 0x0; } +static inline u16 dmi_memdev_handle(int slot) { return 0xffff; } static inline const struct dmi_system_id * dmi_first_match(const struct dmi_system_id *list) { return NULL; } -- cgit v1.2.3 From 03b1230ca12a12e045d83b0357792075bf94a1e0 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 2 Dec 2019 18:50:25 -0700 Subject: io_uring: ensure async punted sendmsg/recvmsg requests copy data Just like commit f67676d160c6 for read/write requests, this one ensures that the msghdr data is fully copied if we need to punt a recvmsg or sendmsg system call to async context. Signed-off-by: Jens Axboe --- include/linux/socket.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index 4bde63021c09..903507fb901f 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -378,12 +378,19 @@ extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, extern int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, unsigned int flags, bool forbid_cmsg_compat); -extern long __sys_sendmsg_sock(struct socket *sock, - struct user_msghdr __user *msg, +extern long __sys_sendmsg_sock(struct socket *sock, struct msghdr *msg, unsigned int flags); -extern long __sys_recvmsg_sock(struct socket *sock, - struct user_msghdr __user *msg, +extern long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg, + struct user_msghdr __user *umsg, + struct sockaddr __user *uaddr, unsigned int flags); +extern int sendmsg_copy_msghdr(struct msghdr *msg, + struct user_msghdr __user *umsg, unsigned flags, + struct iovec **iov); +extern int recvmsg_copy_msghdr(struct msghdr *msg, + struct user_msghdr __user *umsg, unsigned flags, + struct sockaddr __user **uaddr, + struct iovec **iov); /* helpers which do the actual work for syscalls */ extern int __sys_recvfrom(int fd, void __user *ubuf, size_t size, -- cgit v1.2.3 From f499a021ea8c9f70321fce3d674d8eca5bbeee2c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 2 Dec 2019 16:28:46 -0700 Subject: io_uring: ensure async punted connect requests copy data Just like commit f67676d160c6 for read/write requests, this one ensures that the sockaddr data has been copied for IORING_OP_CONNECT if we need to punt the request to async context. Signed-off-by: Jens Axboe --- include/linux/socket.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index 903507fb901f..2d2313403101 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -406,9 +406,8 @@ extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen, int flags); extern int __sys_socket(int family, int type, int protocol); extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen); -extern int __sys_connect_file(struct file *file, - struct sockaddr __user *uservaddr, int addrlen, - int file_flags); +extern int __sys_connect_file(struct file *file, struct sockaddr_storage *addr, + int addrlen, int file_flags); extern int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen); extern int __sys_listen(int fd, int backlog); -- cgit v1.2.3 From 795ee49c1a28d1b3eeb2b463f18d557700fc6153 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sat, 30 Nov 2019 23:23:52 +0300 Subject: block: optimise bvec_iter_advance() bvec_iter_advance() is quite popular, but compilers fail to do proper alias analysis and optimise it good enough. The assembly is checked for gcc 9.2, x86-64. - remove @iter->bi_size from min(...), as it's always less than @bytes. Modify at the beginning and forget about it. - the compiler isn't able to collapse memory dependencies and remove writes in the loop. Help it by explicitely using local vars. Signed-off-by: Arvind Sankar Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- include/linux/bvec.h | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bvec.h b/include/linux/bvec.h index a032f01e928c..679a42253170 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -87,26 +87,24 @@ struct bvec_iter_all { static inline bool bvec_iter_advance(const struct bio_vec *bv, struct bvec_iter *iter, unsigned bytes) { + unsigned int idx = iter->bi_idx; + if (WARN_ONCE(bytes > iter->bi_size, "Attempted to advance past end of bvec iter\n")) { iter->bi_size = 0; return false; } - while (bytes) { - const struct bio_vec *cur = bv + iter->bi_idx; - unsigned len = min3(bytes, iter->bi_size, - cur->bv_len - iter->bi_bvec_done); - - bytes -= len; - iter->bi_size -= len; - iter->bi_bvec_done += len; + iter->bi_size -= bytes; + bytes += iter->bi_bvec_done; - if (iter->bi_bvec_done == cur->bv_len) { - iter->bi_bvec_done = 0; - iter->bi_idx++; - } + while (bytes && bytes >= bv[idx].bv_len) { + bytes -= bv[idx].bv_len; + idx++; } + + iter->bi_idx = idx; + iter->bi_bvec_done = bytes; return true; } -- cgit v1.2.3 From 9b38bb4b1e6de47b379afaad2c707df639bb4dc7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 3 Dec 2019 10:39:04 +0100 Subject: block: simplify blkdev_nr_zones Simplify the arguments to blkdev_nr_zones by passing a gendisk instead of the block_device and capacity. This also removes the need for __blkdev_nr_zones as all callers are outside the fast path and can deal with the additional branch. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6012e2592628..c5852de402b6 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -357,8 +357,7 @@ typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx, #define BLK_ALL_ZONES ((unsigned int)-1) int blkdev_report_zones(struct block_device *bdev, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data); - -extern unsigned int blkdev_nr_zones(struct block_device *bdev); +unsigned int blkdev_nr_zones(struct gendisk *disk); extern int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op, sector_t sectors, sector_t nr_sectors, gfp_t gfp_mask); @@ -371,7 +370,7 @@ extern int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode, #else /* CONFIG_BLK_DEV_ZONED */ -static inline unsigned int blkdev_nr_zones(struct block_device *bdev) +static inline unsigned int blkdev_nr_zones(struct gendisk *disk) { return 0; } -- cgit v1.2.3 From f216fdd77b5654f8c4f6fac6020d6aabc58878ef Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 3 Dec 2019 10:39:05 +0100 Subject: block: replace seq_zones_bitmap with conv_zones_bitmap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Invert the meaning of seq_zones_bitmap by keeping a bitmap of conventional zones. This allows not having a bitmap for devices that do not have conventional zones. Reviewed-by: Javier González Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c5852de402b6..503c4d4c5884 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -503,9 +503,9 @@ struct request_queue { /* * Zoned block device information for request dispatch control. * nr_zones is the total number of zones of the device. This is always - * 0 for regular block devices. seq_zones_bitmap is a bitmap of nr_zones - * bits which indicates if a zone is conventional (bit clear) or - * sequential (bit set). seq_zones_wlock is a bitmap of nr_zones + * 0 for regular block devices. conv_zones_bitmap is a bitmap of nr_zones + * bits which indicates if a zone is conventional (bit set) or + * sequential (bit clear). seq_zones_wlock is a bitmap of nr_zones * bits which indicates if a zone is write locked, that is, if a write * request targeting the zone was dispatched. All three fields are * initialized by the low level device driver (e.g. scsi/sd.c). @@ -518,7 +518,7 @@ struct request_queue { * blk_mq_unfreeze_queue(). */ unsigned int nr_zones; - unsigned long *seq_zones_bitmap; + unsigned long *conv_zones_bitmap; unsigned long *seq_zones_wlock; #endif /* CONFIG_BLK_DEV_ZONED */ @@ -723,9 +723,11 @@ static inline unsigned int blk_queue_zone_no(struct request_queue *q, static inline bool blk_queue_zone_is_seq(struct request_queue *q, sector_t sector) { - if (!blk_queue_is_zoned(q) || !q->seq_zones_bitmap) + if (!blk_queue_is_zoned(q)) return false; - return test_bit(blk_queue_zone_no(q, sector), q->seq_zones_bitmap); + if (!q->conv_zones_bitmap) + return true; + return !test_bit(blk_queue_zone_no(q, sector), q->conv_zones_bitmap); } #else /* CONFIG_BLK_DEV_ZONED */ static inline unsigned int blk_queue_nr_zones(struct request_queue *q) -- cgit v1.2.3 From ae58954d8734c44298f55ed71e683ea944994fab Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 3 Dec 2019 10:39:07 +0100 Subject: block: don't handle bio based drivers in blk_revalidate_disk_zones MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit bio based drivers only need to update q->nr_zones. Do that manually instead of overloading blk_revalidate_disk_zones to keep that function simpler for the next round of changes that will rely even more on the request based functionality. Reviewed-by: Javier González Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 503c4d4c5884..47eb22a3b7f9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -375,11 +375,6 @@ static inline unsigned int blkdev_nr_zones(struct gendisk *disk) return 0; } -static inline int blk_revalidate_disk_zones(struct gendisk *disk) -{ - return 0; -} - static inline int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) -- cgit v1.2.3 From 0f109f0e9a608c381846b3f2270a6a7b72158cb4 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Thu, 21 Nov 2019 08:14:42 +0000 Subject: agp: move AGPGART_MINOR to include/linux/miscdevice.h This patch move the define for AGPGART_MINOR to include/linux/miscdevice.h. It is better that all minor number definitions are in the same place. Signed-off-by: Corentin Labbe Acked-by: Arnd Bergmann Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/1574324085-4338-3-git-send-email-clabbe@baylibre.com --- include/linux/agpgart.h | 2 -- include/linux/miscdevice.h | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/agpgart.h b/include/linux/agpgart.h index c6b61ca97053..21b34a96cfd8 100644 --- a/include/linux/agpgart.h +++ b/include/linux/agpgart.h @@ -30,8 +30,6 @@ #include #include -#define AGPGART_MINOR 175 - struct agp_info { struct agp_version version; /* version of the driver */ u32 bridge_id; /* bridge vendor/device */ diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index 3247a3dc7934..6f2ca42152a0 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -33,6 +33,7 @@ #define SGI_MMTIMER 153 #define STORE_QUEUE_MINOR 155 /* unused */ #define I2O_MINOR 166 +#define AGPGART_MINOR 175 #define HWRNG_MINOR 183 #define MICROCODE_MINOR 184 #define IRNET_MINOR 187 -- cgit v1.2.3 From d04ac224b1688f005a84f764cfe29844f8e9da08 Mon Sep 17 00:00:00 2001 From: Martin Varghese Date: Thu, 5 Dec 2019 05:57:22 +0530 Subject: net: Fixed updating of ethertype in skb_mpls_push() The skb_mpls_push was not updating ethertype of an ethernet packet if the packet was originally received from a non ARPHRD_ETHER device. In the below OVS data path flow, since the device corresponding to port 7 is an l3 device (ARPHRD_NONE) the skb_mpls_push function does not update the ethertype of the packet even though the previous push_eth action had added an ethernet header to the packet. recirc_id(0),in_port(7),eth_type(0x0800),ipv4(tos=0/0xfc,ttl=64,frag=no), actions:push_eth(src=00:00:00:00:00:00,dst=00:00:00:00:00:00), push_mpls(label=13,tc=0,ttl=64,bos=1,eth_type=0x8847),4 Fixes: 8822e270d697 ("net: core: move push MPLS functionality from OvS to core helper") Signed-off-by: Martin Varghese Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 5aea72fe8498..e9133bcf0544 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3529,7 +3529,7 @@ int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci); int skb_vlan_pop(struct sk_buff *skb); int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci); int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto, - int mac_len); + int mac_len, bool ethernet); int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto, int mac_len, bool ethernet); int skb_mpls_update_lse(struct sk_buff *skb, __be32 mpls_lse); -- cgit v1.2.3 From 9d7ea9a297e6445d567056f15b469dde13ca4134 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Wed, 4 Dec 2019 16:49:50 -0800 Subject: mm/vmstat: add helpers to get vmstat item names for each enum type Statistics in vmstat is combined from counters with different structure, but names for them are merged into one array. This patch adds trivial helpers to get name for each item: const char *zone_stat_name(enum zone_stat_item item); const char *numa_stat_name(enum numa_stat_item item); const char *node_stat_name(enum node_stat_item item); const char *writeback_stat_name(enum writeback_stat_item item); const char *vm_event_name(enum vm_event_item item); Names for enum writeback_stat_item are folded in the middle of vmstat_text so this patch moves declaration into header to calculate offset of following items. Also this patch reuses piece of node stat names for lru list names: const char *lru_list_name(enum lru_list lru); This returns common lru list names: "inactive_anon", "active_anon", "inactive_file", "active_file", "unevictable". [khlebnikov@yandex-team.ru: do not use size of vmstat_text as count of /proc/vmstat items] Link: http://lkml.kernel.org/r/157152151769.4139.15423465513138349343.stgit@buzz Link: https://lore.kernel.org/linux-mm/cd1c42ae-281f-c8a8-70ac-1d01d417b2e1@infradead.org/T/#u Link: http://lkml.kernel.org/r/157113012325.453.562783073839432766.stgit@buzz Signed-off-by: Konstantin Khlebnikov Reviewed-by: Andrew Morton Cc: Randy Dunlap Cc: Michal Hocko Cc: Vladimir Davydov Cc: Johannes Weiner Cc: YueHaibing Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmstat.h | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index bdeda4b079fe..b995d8b680c2 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -31,6 +31,12 @@ struct reclaim_stat { unsigned nr_unmap_fail; }; +enum writeback_stat_item { + NR_DIRTY_THRESHOLD, + NR_DIRTY_BG_THRESHOLD, + NR_VM_WRITEBACK_STAT_ITEMS, +}; + #ifdef CONFIG_VM_EVENT_COUNTERS /* * Light weight per cpu counter implementation. @@ -381,4 +387,48 @@ static inline void __mod_zone_freepage_state(struct zone *zone, int nr_pages, extern const char * const vmstat_text[]; +static inline const char *zone_stat_name(enum zone_stat_item item) +{ + return vmstat_text[item]; +} + +#ifdef CONFIG_NUMA +static inline const char *numa_stat_name(enum numa_stat_item item) +{ + return vmstat_text[NR_VM_ZONE_STAT_ITEMS + + item]; +} +#endif /* CONFIG_NUMA */ + +static inline const char *node_stat_name(enum node_stat_item item) +{ + return vmstat_text[NR_VM_ZONE_STAT_ITEMS + + NR_VM_NUMA_STAT_ITEMS + + item]; +} + +static inline const char *lru_list_name(enum lru_list lru) +{ + return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_" +} + +static inline const char *writeback_stat_name(enum writeback_stat_item item) +{ + return vmstat_text[NR_VM_ZONE_STAT_ITEMS + + NR_VM_NUMA_STAT_ITEMS + + NR_VM_NODE_STAT_ITEMS + + item]; +} + +#ifdef CONFIG_VM_EVENT_COUNTERS +static inline const char *vm_event_name(enum vm_event_item item) +{ + return vmstat_text[NR_VM_ZONE_STAT_ITEMS + + NR_VM_NUMA_STAT_ITEMS + + NR_VM_NODE_STAT_ITEMS + + NR_VM_WRITEBACK_STAT_ITEMS + + item]; +} +#endif /* CONFIG_VM_EVENT_COUNTERS */ + #endif /* _LINUX_VMSTAT_H */ -- cgit v1.2.3 From ebc5d83d04438116c24dcc556b0ab6c8ef64b77e Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Wed, 4 Dec 2019 16:49:53 -0800 Subject: mm/memcontrol: use vmstat names for printing statistics Use common names from vmstat array when possible. This gives not much difference in code size for now, but should help in keeping interfaces consistent. add/remove: 0/2 grow/shrink: 2/0 up/down: 70/-72 (-2) Function old new delta memory_stat_format 984 1050 +66 memcg_stat_show 957 961 +4 memcg1_event_names 32 - -32 mem_cgroup_lru_names 40 - -40 Total: Before=14485337, After=14485335, chg -0.00% Link: http://lkml.kernel.org/r/157113012508.453.80391533767219371.stgit@buzz Signed-off-by: Konstantin Khlebnikov Acked-by: Andrew Morton Cc: Michal Hocko Cc: Vladimir Davydov Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmstat.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index b995d8b680c2..292485f3d24d 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -420,7 +420,7 @@ static inline const char *writeback_stat_name(enum writeback_stat_item item) item]; } -#ifdef CONFIG_VM_EVENT_COUNTERS +#if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG) static inline const char *vm_event_name(enum vm_event_item item) { return vmstat_text[NR_VM_ZONE_STAT_ITEMS + @@ -429,6 +429,6 @@ static inline const char *vm_event_name(enum vm_event_item item) NR_VM_WRITEBACK_STAT_ITEMS + item]; } -#endif /* CONFIG_VM_EVENT_COUNTERS */ +#endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */ #endif /* _LINUX_VMSTAT_H */ -- cgit v1.2.3 From 9573e8f70a82bcbac95b1ea222ac9d5e50266f9f Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Wed, 4 Dec 2019 16:50:08 -0800 Subject: include/linux/proc_fs.h: fix confusing macro arg name state_size and ops are in the wrong position. Link: http://lkml.kernel.org/r/20190910021747.11216-1-linmiaohe@huawei.com Signed-off-by: Miaohe Lin Reviewed-by: Andrew Morton Acked-by: Aleksa Sarai Reviewed-by: Christian Brauner Cc: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/proc_fs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index a705aa2d03f9..0640be56dcbd 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -58,8 +58,8 @@ extern int remove_proc_subtree(const char *, struct proc_dir_entry *); struct proc_dir_entry *proc_create_net_data(const char *name, umode_t mode, struct proc_dir_entry *parent, const struct seq_operations *ops, unsigned int state_size, void *data); -#define proc_create_net(name, mode, parent, state_size, ops) \ - proc_create_net_data(name, mode, parent, state_size, ops, NULL) +#define proc_create_net(name, mode, parent, ops, state_size) \ + proc_create_net_data(name, mode, parent, ops, state_size, NULL) struct proc_dir_entry *proc_create_net_single(const char *name, umode_t mode, struct proc_dir_entry *parent, int (*show)(struct seq_file *, void *), void *data); -- cgit v1.2.3 From d5ffb71b633cd5c4b8cce633c9d6448dced4eb74 Mon Sep 17 00:00:00 2001 From: Alessio Balsini Date: Wed, 4 Dec 2019 16:50:14 -0800 Subject: include/linux/sysctl.h: inline braces for ctl_table and ctl_table_header Fix coding style of "struct ctl_table" and "struct ctl_table_header" to have inline braces. Besides the wide use of this proposed cose style, this change helps to find at a glance the struct definition when navigating the code. Link: http://lkml.kernel.org/r/20190903154906.188651-1-balsini@android.com Signed-off-by: Alessio Balsini Acked-by: Luis Chamberlain Cc: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sysctl.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 6df477329b76..02fa84493f23 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -120,8 +120,7 @@ static inline void *proc_sys_poll_event(struct ctl_table_poll *poll) struct ctl_table_poll name = __CTL_TABLE_POLL_INITIALIZER(name) /* A sysctl table is an array of struct ctl_table: */ -struct ctl_table -{ +struct ctl_table { const char *procname; /* Text ID for /proc/sys, or zero */ void *data; int maxlen; @@ -140,8 +139,7 @@ struct ctl_node { /* struct ctl_table_header is used to maintain dynamic lists of struct ctl_table trees. */ -struct ctl_table_header -{ +struct ctl_table_header { union { struct { struct ctl_table *ctl_table; -- cgit v1.2.3 From 8788994376d84d627450fd0d67deb6a66ddf07d7 Mon Sep 17 00:00:00 2001 From: Rikard Falkeborn Date: Wed, 4 Dec 2019 16:50:20 -0800 Subject: linux/build_bug.h: change type to int Having BUILD_BUG_ON_ZERO produce a value of type size_t leads to awkward casts in cases where the result needs to be signed, or of smaller type than size_t. To avoid this, cast the value to int instead and rely on implicit type conversions when a larger or unsigned type is needed. Link: http://lkml.kernel.org/r/20190811184938.1796-3-rikard.falkeborn@gmail.com Signed-off-by: Rikard Falkeborn Suggested-by: Masahiro Yamada Reviewed-by: Kees Cook Reviewed-by: Masahiro Yamada Cc: Joe Perches Cc: Johannes Berg Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/build_bug.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/build_bug.h b/include/linux/build_bug.h index 0fe5426f2bdc..e3a0be2c90ad 100644 --- a/include/linux/build_bug.h +++ b/include/linux/build_bug.h @@ -9,11 +9,11 @@ #else /* __CHECKER__ */ /* * Force a compilation error if condition is true, but also produce a - * result (of value 0 and type size_t), so the expression can be used + * result (of value 0 and type int), so the expression can be used * e.g. in a structure initializer (or where-ever else comma expressions * aren't permitted). */ -#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:(-!!(e)); })) +#define BUILD_BUG_ON_ZERO(e) ((int)(sizeof(struct { int:(-!!(e)); }))) #endif /* __CHECKER__ */ /* Force a compilation error if a constant expression is not a power of 2 */ -- cgit v1.2.3 From 885e68e8b7b1328aa1e28b27e13fbfb5f020d269 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 4 Dec 2019 16:50:32 -0800 Subject: kernel.h: update comment about simple_strto() functions There were discussions in the past about use cases for simple_strto() functions and, in some rare cases, they have a benefit over kstrto() ones. Update a comment to reduce confusion about special use cases. Link: http://lkml.kernel.org/r/20190801192904.41087-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Suggested-by: Miguel Ojeda Cc: Geert Uytterhoeven Cc: Mans Rullgard Cc: Petr Mladek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 09f759228e3f..3adcb39fa6f5 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -348,8 +348,7 @@ int __must_check kstrtoll(const char *s, unsigned int base, long long *res); * @res: Where to write the result of the conversion on success. * * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. - * Used as a replacement for the obsolete simple_strtoull. Return code must - * be checked. + * Used as a replacement for the simple_strtoull. Return code must be checked. */ static inline int __must_check kstrtoul(const char *s, unsigned int base, unsigned long *res) { @@ -377,8 +376,7 @@ static inline int __must_check kstrtoul(const char *s, unsigned int base, unsign * @res: Where to write the result of the conversion on success. * * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. - * Used as a replacement for the obsolete simple_strtoull. Return code must - * be checked. + * Used as a replacement for the simple_strtoull. Return code must be checked. */ static inline int __must_check kstrtol(const char *s, unsigned int base, long *res) { @@ -454,7 +452,18 @@ static inline int __must_check kstrtos32_from_user(const char __user *s, size_t return kstrtoint_from_user(s, count, base, res); } -/* Obsolete, do not use. Use kstrto instead */ +/* + * Use kstrto instead. + * + * NOTE: simple_strto does not check for the range overflow and, + * depending on the input, may give interesting results. + * + * Use these functions if and only if you cannot use kstrto, because + * the conversion ends on the first non-digit character, which may be far + * beyond the supported range. It might be useful to parse the strings like + * 10x50 or 12:21 without altering original string or temporary buffer in use. + * Keep in mind above caveat. + */ extern unsigned long simple_strtoul(const char *,char **,unsigned int); extern long simple_strtol(const char *,char **,unsigned int); -- cgit v1.2.3 From 260a2679e5cbfb3d8a4cf6cd1cb6f57e89c7e543 Mon Sep 17 00:00:00 2001 From: Xiaoming Ni Date: Wed, 4 Dec 2019 16:50:47 -0800 Subject: kernel/notifier.c: remove blocking_notifier_chain_cond_register() blocking_notifier_chain_cond_register() does not consider system_booting state, which is the only difference between this function and blocking_notifier_cain_register(). This can be a bug and is a piece of duplicate code. Delete blocking_notifier_chain_cond_register() Link: http://lkml.kernel.org/r/1568861888-34045-4-git-send-email-nixiaoming@huawei.com Signed-off-by: Xiaoming Ni Reviewed-by: Andrew Morton Cc: Alan Stern Cc: Alexey Dobriyan Cc: Andy Lutomirski Cc: Anna Schumaker Cc: Arjan van de Ven Cc: Chuck Lever Cc: David S. Miller Cc: Ingo Molnar Cc: J. Bruce Fields Cc: Jeff Layton Cc: Nadia Derbey Cc: "Paul E. McKenney" Cc: Sam Protsenko Cc: Thomas Gleixner Cc: Trond Myklebust Cc: Vasily Averin Cc: Viresh Kumar Cc: YueHaibing Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/notifier.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/notifier.h b/include/linux/notifier.h index 0096a05395e3..018947611483 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -150,10 +150,6 @@ extern int raw_notifier_chain_register(struct raw_notifier_head *nh, extern int srcu_notifier_chain_register(struct srcu_notifier_head *nh, struct notifier_block *nb); -extern int blocking_notifier_chain_cond_register( - struct blocking_notifier_head *nh, - struct notifier_block *nb); - extern int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh, struct notifier_block *nb); extern int blocking_notifier_chain_unregister(struct blocking_notifier_head *nh, -- cgit v1.2.3 From 169c474fb22d8a5e909e172f177b957546d0519d Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Wed, 4 Dec 2019 16:50:57 -0800 Subject: bitops: introduce the for_each_set_clump8 macro Pach series "Introduce the for_each_set_clump8 macro", v18. While adding GPIO get_multiple/set_multiple callback support for various drivers, I noticed a pattern of looping manifesting that would be useful standardized as a macro. This patchset introduces the for_each_set_clump8 macro and utilizes it in several GPIO drivers. The for_each_set_clump macro8 facilitates a for-loop syntax that iterates over a memory region entire groups of set bits at a time. For example, suppose you would like to iterate over a 32-bit integer 8 bits at a time, skipping over 8-bit groups with no set bit, where XXXXXXXX represents the current 8-bit group: Example: 10111110 00000000 11111111 00110011 First loop: 10111110 00000000 11111111 XXXXXXXX Second loop: 10111110 00000000 XXXXXXXX 00110011 Third loop: XXXXXXXX 00000000 11111111 00110011 Each iteration of the loop returns the next 8-bit group that has at least one set bit. The for_each_set_clump8 macro has four parameters: * start: set to the bit offset of the current clump * clump: set to the current clump value * bits: bitmap to search within * size: bitmap size in number of bits In this version of the patchset, the for_each_set_clump macro has been reimplemented and simplified based on the suggestions provided by Rasmus Villemoes and Andy Shevchenko in the version 4 submission. In particular, the function of the for_each_set_clump macro has been restricted to handle only 8-bit clumps; the drivers that use the for_each_set_clump macro only handle 8-bit ports so a generic for_each_set_clump implementation is not necessary. Thus, a solution for large clumps (i.e. those larger than the width of a bitmap word) can be postponed until a driver appears that actually requires such a generic for_each_set_clump implementation. For what it's worth, a semi-generic for_each_set_clump (i.e. for clumps smaller than the width of a bitmap word) can be implemented by simply replacing the hardcoded '8' and '0xFF' instances with respective variables. I have not yet had a need for such an implementation, and since it falls short of a true generic for_each_set_clump function, I have decided to forgo such an implementation for now. In addition, the bitmap_get_value8 and bitmap_set_value8 functions are introduced to get and set 8-bit values respectively. Their use is based on the behavior suggested in the patchset version 4 review. This patch (of 14): This macro iterates for each 8-bit group of bits (clump) with set bits, within a bitmap memory region. For each iteration, "start" is set to the bit offset of the found clump, while the respective clump value is stored to the location pointed by "clump". Additionally, the bitmap_get_value8 and bitmap_set_value8 functions are introduced to respectively get and set an 8-bit value in a bitmap memory region. [gustavo@embeddedor.com: fix potential sign-extension overflow] Link: http://lkml.kernel.org/r/20191015184657.GA26541@embeddedor [akpm@linux-foundation.org: s/ULL/UL/, per Joe] [vilhelm.gray@gmail.com: add for_each_set_clump8 documentation] Link: http://lkml.kernel.org/r/20191016161825.301082-1-vilhelm.gray@gmail.com Link: http://lkml.kernel.org/r/893c3b4f03266c9496137cc98ac2b1bd27f92c73.1570641097.git.vilhelm.gray@gmail.com Signed-off-by: William Breathitt Gray Signed-off-by: Gustavo A. R. Silva Suggested-by: Andy Shevchenko Suggested-by: Rasmus Villemoes Suggested-by: Lukas Wunner Tested-by: Andy Shevchenko Cc: Arnd Bergmann Cc: Linus Walleij Cc: Bartosz Golaszewski Cc: Masahiro Yamada Cc: Geert Uytterhoeven Cc: Phil Reid Cc: Geert Uytterhoeven Cc: Mathias Duckeck Cc: Morten Hein Tiljeset Cc: Sean Nyekjaer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 35 +++++++++++++++++++++++++++++++++++ include/linux/bitops.h | 12 ++++++++++++ 2 files changed, 47 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 29fc933df3bf..9f046609e809 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -66,6 +66,8 @@ * bitmap_allocate_region(bitmap, pos, order) Allocate specified bit region * bitmap_from_arr32(dst, buf, nbits) Copy nbits from u32[] buf to dst * bitmap_to_arr32(buf, src, nbits) Copy nbits from buf to u32[] dst + * bitmap_get_value8(map, start) Get 8bit value from map at start + * bitmap_set_value8(map, value, start) Set 8bit value to map at start * * Note, bitmap_zero() and bitmap_fill() operate over the region of * unsigned longs, that is, bits behind bitmap till the unsigned long @@ -489,6 +491,39 @@ static inline void bitmap_from_u64(unsigned long *dst, u64 mask) dst[1] = mask >> 32; } +/** + * bitmap_get_value8 - get an 8-bit value within a memory region + * @map: address to the bitmap memory region + * @start: bit offset of the 8-bit value; must be a multiple of 8 + * + * Returns the 8-bit value located at the @start bit offset within the @src + * memory region. + */ +static inline unsigned long bitmap_get_value8(const unsigned long *map, + unsigned long start) +{ + const size_t index = BIT_WORD(start); + const unsigned long offset = start % BITS_PER_LONG; + + return (map[index] >> offset) & 0xFF; +} + +/** + * bitmap_set_value8 - set an 8-bit value within a memory region + * @map: address to the bitmap memory region + * @value: the 8-bit value; values wider than 8 bits may clobber bitmap + * @start: bit offset of the 8-bit value; must be a multiple of 8 + */ +static inline void bitmap_set_value8(unsigned long *map, unsigned long value, + unsigned long start) +{ + const size_t index = BIT_WORD(start); + const unsigned long offset = start % BITS_PER_LONG; + + map[index] &= ~(0xFFUL << offset); + map[index] |= value << offset; +} + #endif /* __ASSEMBLY__ */ #endif /* __LINUX_BITMAP_H */ diff --git a/include/linux/bitops.h b/include/linux/bitops.h index c94a9ff9f082..e479067c202c 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -47,6 +47,18 @@ extern unsigned long __sw_hweight64(__u64 w); (bit) < (size); \ (bit) = find_next_zero_bit((addr), (size), (bit) + 1)) +/** + * for_each_set_clump8 - iterate over bitmap for each 8-bit clump with set bits + * @start: bit offset to start search and to store the current iteration offset + * @clump: location to store copy of current 8-bit clump + * @bits: bitmap address to base the search on + * @size: bitmap size in number of bits + */ +#define for_each_set_clump8(start, clump, bits, size) \ + for ((start) = find_first_clump8(&(clump), (bits), (size)); \ + (start) < (size); \ + (start) = find_next_clump8(&(clump), (bits), (size), (start) + 8)) + static inline int get_bitmask_order(unsigned int count) { int order; -- cgit v1.2.3 From 11d43e62f693c66c8c76c2ea2349e0f3c5764964 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Wed, 4 Dec 2019 16:51:47 -0800 Subject: lib/rbtree: set successor's parent unconditionally Both in Case 2 and 3, we exchange n and s. This mean no matter whether child2 is NULL or not, successor's parent should be assigned to node's. This patch takes this step out to make it explicit and reduce the ambiguity. Besides, this step reduces some symbol size like rb_erase(). KERN_CONFIG upstream patched OPT_FOR_PERF 877 870 OPT_FOR_SIZE 635 621 Link: http://lkml.kernel.org/r/20191028021442.5450-1-richardw.yang@linux.intel.com Signed-off-by: Wei Yang Acked-by: Peter Zijlstra (Intel) Reviewed-by: Michel Lespinasse Reviewed-by: Davidlohr Bueso Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rbtree_augmented.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h index fdd421b8d9ae..99c42e1a74b8 100644 --- a/include/linux/rbtree_augmented.h +++ b/include/linux/rbtree_augmented.h @@ -283,14 +283,13 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root, __rb_change_child(node, successor, tmp, root); if (child2) { - successor->__rb_parent_color = pc; rb_set_parent_color(child2, parent, RB_BLACK); rebalance = NULL; } else { unsigned long pc2 = successor->__rb_parent_color; - successor->__rb_parent_color = pc; rebalance = __rb_is_black(pc2) ? parent : NULL; } + successor->__rb_parent_color = pc; tmp = successor; } -- cgit v1.2.3 From 8b7569a224a18953b9aee29c375e439b8a6eeb05 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Wed, 4 Dec 2019 16:51:50 -0800 Subject: lib/rbtree: get successor's color directly After move parent assignment out, we can check the color directly. Link: http://lkml.kernel.org/r/20191028021442.5450-2-richardw.yang@linux.intel.com Signed-off-by: Wei Yang Acked-by: Peter Zijlstra (Intel) Reviewed-by: Michel Lespinasse Reviewed-by: Davidlohr Bueso Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rbtree_augmented.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h index 99c42e1a74b8..724b0d036b57 100644 --- a/include/linux/rbtree_augmented.h +++ b/include/linux/rbtree_augmented.h @@ -286,8 +286,7 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root, rb_set_parent_color(child2, parent, RB_BLACK); rebalance = NULL; } else { - unsigned long pc2 = successor->__rb_parent_color; - rebalance = __rb_is_black(pc2) ? parent : NULL; + rebalance = rb_is_black(successor) ? parent : NULL; } successor->__rb_parent_color = pc; tmp = successor; -- cgit v1.2.3 From 964975ac6677c97ae61ec9d6969dd5d03f18d1c3 Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Wed, 4 Dec 2019 16:52:03 -0800 Subject: lib/genalloc.c: rename addr_in_gen_pool to gen_pool_has_addr Follow the kernel conventions, rename addr_in_gen_pool to gen_pool_has_addr. [sjhuang@iluvatar.ai: fix Documentation/ too] Link: http://lkml.kernel.org/r/20181229015914.5573-1-sjhuang@iluvatar.ai Link: http://lkml.kernel.org/r/20181228083950.20398-1-sjhuang@iluvatar.ai Signed-off-by: Huang Shijie Reviewed-by: Andrew Morton Cc: Russell King Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Cc: Christoph Hellwig Cc: Marek Szyprowski Cc: Robin Murphy Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/genalloc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h index 4bd583bd6934..5b14a0f38124 100644 --- a/include/linux/genalloc.h +++ b/include/linux/genalloc.h @@ -206,7 +206,7 @@ extern struct gen_pool *devm_gen_pool_create(struct device *dev, int min_alloc_order, int nid, const char *name); extern struct gen_pool *gen_pool_get(struct device *dev, const char *name); -bool addr_in_gen_pool(struct gen_pool *pool, unsigned long start, +extern bool gen_pool_has_addr(struct gen_pool *pool, unsigned long start, size_t size); #ifdef CONFIG_OF -- cgit v1.2.3 From 6d13de1489b6bf539695f96d945de3860e6d5e17 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 4 Dec 2019 16:52:40 -0800 Subject: uaccess: disallow > INT_MAX copy sizes As we've done with VFS, string operations, etc, reject usercopy sizes larger than INT_MAX, which would be nice to have for catching bugs related to size calculation overflows[1]. This adds 10 bytes to x86_64 defconfig text and 1980 bytes to the data section: text data bss dec hex filename 19691167 5134320 1646664 26472151 193eed7 vmlinux.before 19691177 5136300 1646664 26474141 193f69d vmlinux.after [1] https://marc.info/?l=linux-s390&m=156631939010493&w=2 Link: http://lkml.kernel.org/r/201908251612.F9902D7A@keescook Signed-off-by: Kees Cook Suggested-by: Dan Carpenter Cc: Alexander Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/thread_info.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 659a4400517b..e93e249a4e9b 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -147,6 +147,8 @@ check_copy_size(const void *addr, size_t bytes, bool is_source) __bad_copy_to(); return false; } + if (WARN_ON_ONCE(bytes > INT_MAX)) + return false; check_object_size(addr, bytes, is_source); return true; } -- cgit v1.2.3 From eec028c9386ed1a692aa01a85b55952202b41619 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 4 Dec 2019 16:52:43 -0800 Subject: kcov: remote coverage support Patch series " kcov: collect coverage from usb and vhost", v3. This patchset extends kcov to allow collecting coverage from backgound kernel threads. This extension requires custom annotations for each of the places where coverage collection is desired. This patchset implements this for hub events in the USB subsystem and for vhost workers. See the first patch description for details about the kcov extension. The other two patches apply this kcov extension to USB and vhost. Examples of other subsystems that might potentially benefit from this when custom annotations are added (the list is based on process_one_work() callers for bugs recently reported by syzbot): 1. fs: writeback wb_workfn() worker, 2. net: addrconf_dad_work()/addrconf_verify_work() workers, 3. net: neigh_periodic_work() worker, 4. net/p9: p9_write_work()/p9_read_work() workers, 5. block: blk_mq_run_work_fn() worker. These patches have been used to enable coverage-guided USB fuzzing with syzkaller for the last few years, see the details here: https://github.com/google/syzkaller/blob/master/docs/linux/external_fuzzing_usb.md This patchset has been pushed to the public Linux kernel Gerrit instance: https://linux-review.googlesource.com/c/linux/kernel/git/torvalds/linux/+/1524 This patch (of 3): Add background thread coverage collection ability to kcov. With KCOV_ENABLE coverage is collected only for syscalls that are issued from the current process. With KCOV_REMOTE_ENABLE it's possible to collect coverage for arbitrary parts of the kernel code, provided that those parts are annotated with kcov_remote_start()/kcov_remote_stop(). This allows to collect coverage from two types of kernel background threads: the global ones, that are spawned during kernel boot in a limited number of instances (e.g. one USB hub_event() worker thread is spawned per USB HCD); and the local ones, that are spawned when a user interacts with some kernel interface (e.g. vhost workers). To enable collecting coverage from a global background thread, a unique global handle must be assigned and passed to the corresponding kcov_remote_start() call. Then a userspace process can pass a list of such handles to the KCOV_REMOTE_ENABLE ioctl in the handles array field of the kcov_remote_arg struct. This will attach the used kcov device to the code sections, that are referenced by those handles. Since there might be many local background threads spawned from different userspace processes, we can't use a single global handle per annotation. Instead, the userspace process passes a non-zero handle through the common_handle field of the kcov_remote_arg struct. This common handle gets saved to the kcov_handle field in the current task_struct and needs to be passed to the newly spawned threads via custom annotations. Those threads should in turn be annotated with kcov_remote_start()/kcov_remote_stop(). Internally kcov stores handles as u64 integers. The top byte of a handle is used to denote the id of a subsystem that this handle belongs to, and the lower 4 bytes are used to denote the id of a thread instance within that subsystem. A reserved value 0 is used as a subsystem id for common handles as they don't belong to a particular subsystem. The bytes 4-7 are currently reserved and must be zero. In the future the number of bytes used for the subsystem or handle ids might be increased. When a particular userspace process collects coverage by via a common handle, kcov will collect coverage for each code section that is annotated to use the common handle obtained as kcov_handle from the current task_struct. However non common handles allow to collect coverage selectively from different subsystems. Link: http://lkml.kernel.org/r/e90e315426a384207edbec1d6aa89e43008e4caf.1572366574.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Cc: Dmitry Vyukov Cc: Greg Kroah-Hartman Cc: Alan Stern Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Arnd Bergmann Cc: Steven Rostedt Cc: David Windsor Cc: Elena Reshetova Cc: Anders Roxell Cc: Alexander Potapenko Cc: Marco Elver Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kcov.h | 23 +++++++++++++++++++++++ include/linux/sched.h | 8 ++++++++ 2 files changed, 31 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kcov.h b/include/linux/kcov.h index b76a1807028d..a10e84707d82 100644 --- a/include/linux/kcov.h +++ b/include/linux/kcov.h @@ -37,12 +37,35 @@ do { \ (t)->kcov_mode &= ~KCOV_IN_CTXSW; \ } while (0) +/* See Documentation/dev-tools/kcov.rst for usage details. */ +void kcov_remote_start(u64 handle); +void kcov_remote_stop(void); +u64 kcov_common_handle(void); + +static inline void kcov_remote_start_common(u64 id) +{ + kcov_remote_start(kcov_remote_handle(KCOV_SUBSYSTEM_COMMON, id)); +} + +static inline void kcov_remote_start_usb(u64 id) +{ + kcov_remote_start(kcov_remote_handle(KCOV_SUBSYSTEM_USB, id)); +} + #else static inline void kcov_task_init(struct task_struct *t) {} static inline void kcov_task_exit(struct task_struct *t) {} static inline void kcov_prepare_switch(struct task_struct *t) {} static inline void kcov_finish_switch(struct task_struct *t) {} +static inline void kcov_remote_start(u64 handle) {} +static inline void kcov_remote_stop(void) {} +static inline u64 kcov_common_handle(void) +{ + return 0; +} +static inline void kcov_remote_start_common(u64 id) {} +static inline void kcov_remote_start_usb(u64 id) {} #endif /* CONFIG_KCOV */ #endif /* _LINUX_KCOV_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 0cd97d9dd021..467d26046416 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1210,6 +1210,8 @@ struct task_struct { #endif /* CONFIG_TRACING */ #ifdef CONFIG_KCOV + /* See kernel/kcov.c for more details. */ + /* Coverage collection mode enabled for this task (0 if disabled): */ unsigned int kcov_mode; @@ -1221,6 +1223,12 @@ struct task_struct { /* KCOV descriptor wired with this task or NULL: */ struct kcov *kcov; + + /* KCOV common handle for remote coverage collection: */ + u64 kcov_handle; + + /* KCOV sequence number: */ + int kcov_sequence; #endif #ifdef CONFIG_MEMCG -- cgit v1.2.3 From 30544ed5de431fe25d3793e4dd5a058d877c4d77 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 4 Dec 2019 16:53:26 -0800 Subject: lib/bitmap: introduce bitmap_replace() helper In some drivers we want to have a single operation over bitmap which is an equivalent to: *dst = (*old & ~(*mask)) | (*new & *mask) Introduce bitmap_replace() helper for this. Link: http://lkml.kernel.org/r/20191022172922.61232-8-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Acked-by: Linus Walleij Cc: Rasmus Villemoes Cc: Bartosz Golaszewski Cc: Geert Uytterhoeven Cc: Marek Vasut Cc: Thomas Petazzoni Cc: William Breathitt Gray Cc: Yury Norov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 9f046609e809..ff335b22f23c 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -53,6 +53,7 @@ * bitmap_find_next_zero_area_off(buf, len, pos, n, mask) as above * bitmap_shift_right(dst, src, n, nbits) *dst = *src >> n * bitmap_shift_left(dst, src, n, nbits) *dst = *src << n + * bitmap_replace(dst, old, new, mask, nbits) *dst = (*old & ~(*mask)) | (*new & *mask) * bitmap_remap(dst, src, old, new, nbits) *dst = map(old, new)(src) * bitmap_bitremap(oldbit, old, new, nbits) newbit = map(old, new)(oldbit) * bitmap_onto(dst, orig, relmap, nbits) *dst = orig relative to relmap @@ -140,6 +141,9 @@ extern void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); extern int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); +extern void __bitmap_replace(unsigned long *dst, + const unsigned long *old, const unsigned long *new, + const unsigned long *mask, unsigned int nbits); extern int __bitmap_intersects(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); extern int __bitmap_subset(const unsigned long *bitmap1, @@ -434,6 +438,18 @@ static inline void bitmap_shift_left(unsigned long *dst, const unsigned long *sr __bitmap_shift_left(dst, src, shift, nbits); } +static inline void bitmap_replace(unsigned long *dst, + const unsigned long *old, + const unsigned long *new, + const unsigned long *mask, + unsigned int nbits) +{ + if (small_const_nbits(nbits)) + *dst = (*old & ~(*mask)) | (*new & *mask); + else + __bitmap_replace(dst, old, new, mask, nbits); +} + static inline int bitmap_parse(const char *buf, unsigned int buflen, unsigned long *maskp, int nmaskbits) { -- cgit v1.2.3 From f949286c668aed5aa24acdb5838be9cfd9513bd3 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 4 Dec 2019 16:54:32 -0800 Subject: mm: remove __ARCH_HAS_4LEVEL_HACK and include/asm-generic/4level-fixup.h There are no architectures that use include/asm-generic/4level-fixup.h therefore it can be removed along with __ARCH_HAS_4LEVEL_HACK define. Link: http://lkml.kernel.org/r/1572938135-31886-14-git-send-email-rppt@kernel.org Signed-off-by: Mike Rapoport Cc: Anatoly Pugachev Cc: Anton Ivanov Cc: Arnd Bergmann Cc: "David S. Miller" Cc: Geert Uytterhoeven Cc: Greentime Hu Cc: Greg Ungerer Cc: Helge Deller Cc: "James E.J. Bottomley" Cc: Jeff Dike Cc: "Kirill A. Shutemov" Cc: Mark Salter Cc: Matt Turner Cc: Michal Simek Cc: Peter Rosin Cc: Richard Weinberger Cc: Rolf Eike Beer Cc: Russell King Cc: Russell King Cc: Sam Creasey Cc: Vincent Chen Cc: Vineet Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 8b0ef04b6d15..c97ea3b694e6 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1838,12 +1838,12 @@ static inline void mm_dec_nr_ptes(struct mm_struct *mm) {} int __pte_alloc(struct mm_struct *mm, pmd_t *pmd); int __pte_alloc_kernel(pmd_t *pmd); +#if defined(CONFIG_MMU) + /* - * The following ifdef needed to get the 4level-fixup.h header to work. - * Remove it when 4level-fixup.h has been removed. + * The following ifdef needed to get the 5level-fixup.h header to work. + * Remove it when 5level-fixup.h has been removed. */ -#if defined(CONFIG_MMU) && !defined(__ARCH_HAS_4LEVEL_HACK) - #ifndef __ARCH_HAS_5LEVEL_HACK static inline p4d_t *p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) @@ -1865,7 +1865,7 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))? NULL: pmd_offset(pud, address); } -#endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */ +#endif /* CONFIG_MMU */ #if USE_SPLIT_PTE_PTLOCKS #if ALLOC_SPLIT_PTLOCKS -- cgit v1.2.3 From 61ff72f4016804b99d28988a57e65c217f01769d Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 28 Nov 2019 08:47:51 +0800 Subject: printk: Drop pr_warning definition With all pr_warning are removed, saftely drop pr_warning definition. Link: http://lkml.kernel.org/r/20191128004752.35268-4-wangkefeng.wang@huawei.com To: joe@perches.com To: linux-kernel@vger.kernel.org Cc: gregkh@linuxfoundation.org Cc: tj@kernel.org Cc: arnd@arndb.de Cc: sergey.senozhatsky@gmail.com Cc: rostedt@goodmis.org Signed-off-by: Kefeng Wang Signed-off-by: Petr Mladek --- include/linux/printk.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/printk.h b/include/linux/printk.h index c09d67edda3a..1e6108b8d15f 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -302,9 +302,8 @@ extern int kptr_restrict; printk(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__) #define pr_err(fmt, ...) \ printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) -#define pr_warning(fmt, ...) \ +#define pr_warn(fmt, ...) \ printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__) -#define pr_warn pr_warning #define pr_notice(fmt, ...) \ printk(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__) #define pr_info(fmt, ...) \ -- cgit v1.2.3 From 04d26e7b159a396372646a480f4caa166d1b6720 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 6 Dec 2019 12:38:36 +0100 Subject: tcp: fix rejected syncookies due to stale timestamps If no synflood happens for a long enough period of time, then the synflood timestamp isn't refreshed and jiffies can advance so much that time_after32() can't accurately compare them any more. Therefore, we can end up in a situation where time_after32(now, last_overflow + HZ) returns false, just because these two values are too far apart. In that case, the synflood timestamp isn't updated as it should be, which can trick tcp_synq_no_recent_overflow() into rejecting valid syncookies. For example, let's consider the following scenario on a system with HZ=1000: * The synflood timestamp is 0, either because that's the timestamp of the last synflood or, more commonly, because we're working with a freshly created socket. * We receive a new SYN, which triggers synflood protection. Let's say that this happens when jiffies == 2147484649 (that is, 'synflood timestamp' + HZ + 2^31 + 1). * Then tcp_synq_overflow() doesn't update the synflood timestamp, because time_after32(2147484649, 1000) returns false. With: - 2147484649: the value of jiffies, aka. 'now'. - 1000: the value of 'last_overflow' + HZ. * A bit later, we receive the ACK completing the 3WHS. But cookie_v[46]_check() rejects it because tcp_synq_no_recent_overflow() says that we're not under synflood. That's because time_after32(2147484649, 120000) returns false. With: - 2147484649: the value of jiffies, aka. 'now'. - 120000: the value of 'last_overflow' + TCP_SYNCOOKIE_VALID. Of course, in reality jiffies would have increased a bit, but this condition will last for the next 119 seconds, which is far enough to accommodate for jiffie's growth. Fix this by updating the overflow timestamp whenever jiffies isn't within the [last_overflow, last_overflow + HZ] range. That shouldn't have any performance impact since the update still happens at most once per second. Now we're guaranteed to have fresh timestamps while under synflood, so tcp_synq_no_recent_overflow() can safely use it with time_after32() in such situations. Stale timestamps can still make tcp_synq_no_recent_overflow() return the wrong verdict when not under synflood. This will be handled in the next patch. For 64 bits architectures, the problem was introduced with the conversion of ->tw_ts_recent_stamp to 32 bits integer by commit cca9bab1b72c ("tcp: use monotonic timestamps for PAWS"). The problem has always been there on 32 bits architectures. Fixes: cca9bab1b72c ("tcp: use monotonic timestamps for PAWS") Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Guillaume Nault Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/time.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/time.h b/include/linux/time.h index 0760a4f5a15c..8e10b9dbd8c2 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -97,4 +97,17 @@ static inline bool itimerspec64_valid(const struct itimerspec64 *its) */ #define time_after32(a, b) ((s32)((u32)(b) - (u32)(a)) < 0) #define time_before32(b, a) time_after32(a, b) + +/** + * time_between32 - check if a 32-bit timestamp is within a given time range + * @t: the time which may be within [l,h] + * @l: the lower bound of the range + * @h: the higher bound of the range + * + * time_before32(t, l, h) returns true if @l <= @t <= @h. All operands are + * treated as 32-bit integers. + * + * Equivalent to !(time_before32(@t, @l) || time_after32(@t, @h)). + */ +#define time_between32(t, l, h) ((u32)(h) - (u32)(l) >= (u32)(t) - (u32)(l)) #endif -- cgit v1.2.3 From 501a90c945103e8627406763dac418f20f3837b2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 5 Dec 2019 20:43:46 -0800 Subject: inet: protect against too small mtu values. syzbot was once again able to crash a host by setting a very small mtu on loopback device. Let's make inetdev_valid_mtu() available in include/net/ip.h, and use it in ip_setup_cork(), so that we protect both ip_append_page() and __ip_append_data() Also add a READ_ONCE() when the device mtu is read. Pairs this lockless read with one WRITE_ONCE() in __dev_set_mtu(), even if other code paths might write over this field. Add a big comment in include/linux/netdevice.h about dev->mtu needing READ_ONCE()/WRITE_ONCE() annotations. Hopefully we will add the missing ones in followup patches. [1] refcount_t: saturated; leaking memory. WARNING: CPU: 0 PID: 9464 at lib/refcount.c:22 refcount_warn_saturate+0x138/0x1f0 lib/refcount.c:22 Kernel panic - not syncing: panic_on_warn set ... CPU: 0 PID: 9464 Comm: syz-executor850 Not tainted 5.4.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x197/0x210 lib/dump_stack.c:118 panic+0x2e3/0x75c kernel/panic.c:221 __warn.cold+0x2f/0x3e kernel/panic.c:582 report_bug+0x289/0x300 lib/bug.c:195 fixup_bug arch/x86/kernel/traps.c:174 [inline] fixup_bug arch/x86/kernel/traps.c:169 [inline] do_error_trap+0x11b/0x200 arch/x86/kernel/traps.c:267 do_invalid_op+0x37/0x50 arch/x86/kernel/traps.c:286 invalid_op+0x23/0x30 arch/x86/entry/entry_64.S:1027 RIP: 0010:refcount_warn_saturate+0x138/0x1f0 lib/refcount.c:22 Code: 06 31 ff 89 de e8 c8 f5 e6 fd 84 db 0f 85 6f ff ff ff e8 7b f4 e6 fd 48 c7 c7 e0 71 4f 88 c6 05 56 a6 a4 06 01 e8 c7 a8 b7 fd <0f> 0b e9 50 ff ff ff e8 5c f4 e6 fd 0f b6 1d 3d a6 a4 06 31 ff 89 RSP: 0018:ffff88809689f550 EFLAGS: 00010286 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffffffff815e4336 RDI: ffffed1012d13e9c RBP: ffff88809689f560 R08: ffff88809c50a3c0 R09: fffffbfff15d31b1 R10: fffffbfff15d31b0 R11: ffffffff8ae98d87 R12: 0000000000000001 R13: 0000000000040100 R14: ffff888099041104 R15: ffff888218d96e40 refcount_add include/linux/refcount.h:193 [inline] skb_set_owner_w+0x2b6/0x410 net/core/sock.c:1999 sock_wmalloc+0xf1/0x120 net/core/sock.c:2096 ip_append_page+0x7ef/0x1190 net/ipv4/ip_output.c:1383 udp_sendpage+0x1c7/0x480 net/ipv4/udp.c:1276 inet_sendpage+0xdb/0x150 net/ipv4/af_inet.c:821 kernel_sendpage+0x92/0xf0 net/socket.c:3794 sock_sendpage+0x8b/0xc0 net/socket.c:936 pipe_to_sendpage+0x2da/0x3c0 fs/splice.c:458 splice_from_pipe_feed fs/splice.c:512 [inline] __splice_from_pipe+0x3ee/0x7c0 fs/splice.c:636 splice_from_pipe+0x108/0x170 fs/splice.c:671 generic_splice_sendpage+0x3c/0x50 fs/splice.c:842 do_splice_from fs/splice.c:861 [inline] direct_splice_actor+0x123/0x190 fs/splice.c:1035 splice_direct_to_actor+0x3b4/0xa30 fs/splice.c:990 do_splice_direct+0x1da/0x2a0 fs/splice.c:1078 do_sendfile+0x597/0xd00 fs/read_write.c:1464 __do_sys_sendfile64 fs/read_write.c:1525 [inline] __se_sys_sendfile64 fs/read_write.c:1511 [inline] __x64_sys_sendfile64+0x1dd/0x220 fs/read_write.c:1511 do_syscall_64+0xfa/0x790 arch/x86/entry/common.c:294 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x441409 Code: e8 ac e8 ff ff 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 eb 08 fc ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fffb64c4f78 EFLAGS: 00000246 ORIG_RAX: 0000000000000028 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 0000000000441409 RDX: 0000000000000000 RSI: 0000000000000006 RDI: 0000000000000005 RBP: 0000000000073b8a R08: 0000000000000010 R09: 0000000000000010 R10: 0000000000010001 R11: 0000000000000246 R12: 0000000000402180 R13: 0000000000402210 R14: 0000000000000000 R15: 0000000000000000 Kernel Offset: disabled Rebooting in 86400 seconds.. Fixes: 1470ddf7f8ce ("inet: Remove explicit write references to sk/inet in ip_append_data") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index cf0923579af4..9ef20389622d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1881,6 +1881,11 @@ struct net_device { unsigned char if_port; unsigned char dma; + /* Note : dev->mtu is often read without holding a lock. + * Writers usually hold RTNL. + * It is recommended to use READ_ONCE() to annotate the reads, + * and to use WRITE_ONCE() to annotate the writes. + */ unsigned int mtu; unsigned int min_mtu; unsigned int max_mtu; -- cgit v1.2.3 From a28c8b9db8a1014aa572cd19a3bdb9ddebd3e555 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 7 Dec 2019 13:21:01 -0800 Subject: pipe: remove 'waiting_writers' merging logic This code is ancient, and goes back to when we only had a single page for the pipe buffers. The exact history is hidden in the mists of time (ie "before git", and in fact predates the BK repository too). At that long-ago point in time, it actually helped to try to merge big back-and-forth pipe reads and writes, and not limit pipe reads to the single pipe buffer in length just because that was all we had at a time. However, since then we've expanded the pipe buffers to multiple pages, and this logic really doesn't seem to make sense. And a lot of it is somewhat questionable (ie "hmm, the user asked for a non-blocking read, but we see that there's a writer pending, so let's wait anyway to get the extra data that the writer will have"). But more importantly, it makes the "go to sleep" logic much less obvious, and considering the wakeup issues we've had, I want to make for less of those kinds of things. Cc: David Howells Signed-off-by: Linus Torvalds --- include/linux/pipe_fs_i.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 44f2245debda..dbcfa6892384 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -38,7 +38,6 @@ struct pipe_buffer { * @readers: number of current readers of this pipe * @writers: number of current writers of this pipe * @files: number of struct file referring this pipe (protected by ->i_lock) - * @waiting_writers: number of writers blocked waiting for room * @r_counter: reader counter * @w_counter: writer counter * @fasync_readers: reader side fasync @@ -56,7 +55,6 @@ struct pipe_inode_info { unsigned int readers; unsigned int writers; unsigned int files; - unsigned int waiting_writers; unsigned int r_counter; unsigned int w_counter; struct page *tmp_page; -- cgit v1.2.3 From 9fa76ca7b8bdcdf51fc8c7b7b7a7bfc4eccceb58 Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Fri, 6 Dec 2019 16:55:41 +0000 Subject: efi: Fix efi_loaded_image_t::unload type The ::unload field is a function pointer, so it should be u32 for 32-bit, u64 for 64-bit. Add a prototype for it in the native efi_loaded_image_t type. Also change type of parent_handle and device_handle from void * to efi_handle_t for documentation purposes. The unload method is not used, so no functional change. Signed-off-by: Arvind Sankar Signed-off-by: Ard Biesheuvel Cc: Andy Shevchenko Cc: Bhupesh Sharma Cc: Linus Torvalds Cc: Masayoshi Mizuma Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191206165542.31469-6-ardb@kernel.org Signed-off-by: Ingo Molnar --- include/linux/efi.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 99dfea595c8c..aa54586db7a5 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -824,7 +824,7 @@ typedef struct { __aligned_u64 image_size; unsigned int image_code_type; unsigned int image_data_type; - unsigned long unload; + u32 unload; } efi_loaded_image_32_t; typedef struct { @@ -840,14 +840,14 @@ typedef struct { __aligned_u64 image_size; unsigned int image_code_type; unsigned int image_data_type; - unsigned long unload; + u64 unload; } efi_loaded_image_64_t; typedef struct { u32 revision; - void *parent_handle; + efi_handle_t parent_handle; efi_system_table_t *system_table; - void *device_handle; + efi_handle_t device_handle; void *file_path; void *reserved; u32 load_options_size; @@ -856,7 +856,7 @@ typedef struct { __aligned_u64 image_size; unsigned int image_code_type; unsigned int image_data_type; - unsigned long unload; + efi_status_t (*unload)(efi_handle_t image_handle); } efi_loaded_image_t; -- cgit v1.2.3 From 05d7ae15cfb18f9ce55eef85bb6bcd62d31acc57 Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Thu, 5 Dec 2019 12:05:06 +0200 Subject: PM / devfreq: Add PM QoS support Register notifiers with the PM QoS framework in order to respond to requests for DEV_PM_QOS_MIN_FREQUENCY and DEV_PM_QOS_MAX_FREQUENCY. No notifiers are added by this patch but PM QoS constraints can be imposed externally (for example from other devices). Signed-off-by: Leonard Crestez Acked-by: Chanwoo Choi Reviewed-by: Matthias Kaehlcke Tested-by: Matthias Kaehlcke Signed-off-by: Chanwoo Choi --- include/linux/devfreq.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index 2bae9ed3c783..8b92ccbd1962 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -136,6 +136,8 @@ struct devfreq_dev_profile { * @time_in_state: Statistics of devfreq states * @last_stat_updated: The last time stat updated * @transition_notifier_list: list head of DEVFREQ_TRANSITION_NOTIFIER notifier + * @nb_min: Notifier block for DEV_PM_QOS_MIN_FREQUENCY + * @nb_max: Notifier block for DEV_PM_QOS_MAX_FREQUENCY * * This structure stores the devfreq information for a give device. * @@ -178,6 +180,9 @@ struct devfreq { unsigned long last_stat_updated; struct srcu_notifier_head transition_notifier_list; + + struct notifier_block nb_min; + struct notifier_block nb_max; }; struct devfreq_freqs { -- cgit v1.2.3 From 27dbc542f651ed09de910f274b32634904103774 Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Thu, 5 Dec 2019 12:05:07 +0200 Subject: PM / devfreq: Use PM QoS for sysfs min/max_freq Switch the handling of min_freq and max_freq from sysfs to use the dev_pm_qos_request interface. Since PM QoS handles frequencies as kHz this change reduces the precision of min_freq and max_freq. This shouldn't introduce problems because frequencies which are not an integer number of kHz are likely not an integer number of Hz either. Try to ensure compatibility by rounding min values down and rounding max values up. Signed-off-by: Leonard Crestez Acked-by: Chanwoo Choi Reviewed-by: Matthias Kaehlcke Tested-by: Matthias Kaehlcke [cw00.choi: Return -EAGAIN instead of -EINVAL if dev_pm_qos is inactive] Signed-off-by: Chanwoo Choi --- include/linux/devfreq.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index 8b92ccbd1962..fb376b5b7281 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -13,6 +13,7 @@ #include #include #include +#include #define DEVFREQ_NAME_LEN 16 @@ -123,8 +124,8 @@ struct devfreq_dev_profile { * @previous_freq: previously configured frequency value. * @data: Private data of the governor. The devfreq framework does not * touch this. - * @min_freq: Limit minimum frequency requested by user (0: none) - * @max_freq: Limit maximum frequency requested by user (0: none) + * @user_min_freq_req: PM QoS minimum frequency request from user (via sysfs) + * @user_max_freq_req: PM QoS maximum frequency request from user (via sysfs) * @scaling_min_freq: Limit minimum frequency requested by OPP interface * @scaling_max_freq: Limit maximum frequency requested by OPP interface * @stop_polling: devfreq polling status of a device. @@ -163,8 +164,8 @@ struct devfreq { void *data; /* private data for governors */ - unsigned long min_freq; - unsigned long max_freq; + struct dev_pm_qos_request user_min_freq_req; + struct dev_pm_qos_request user_max_freq_req; unsigned long scaling_min_freq; unsigned long scaling_max_freq; bool stop_polling; -- cgit v1.2.3 From c593642c8be046915ca3a4a300243a68077cd207 Mon Sep 17 00:00:00 2001 From: Pankaj Bharadiya Date: Mon, 9 Dec 2019 10:31:43 -0800 Subject: treewide: Use sizeof_field() macro Replace all the occurrences of FIELD_SIZEOF() with sizeof_field() except at places where these are defined. Later patches will remove the unused definition of FIELD_SIZEOF(). This patch is generated using following script: EXCLUDE_FILES="include/linux/stddef.h|include/linux/kernel.h" git grep -l -e "\bFIELD_SIZEOF\b" | while read file; do if [[ "$file" =~ $EXCLUDE_FILES ]]; then continue fi sed -i -e 's/\bFIELD_SIZEOF\b/sizeof_field/g' $file; done Signed-off-by: Pankaj Bharadiya Link: https://lore.kernel.org/r/20190924105839.110713-3-pankaj.laxminarayan.bharadiya@intel.com Co-developed-by: Kees Cook Signed-off-by: Kees Cook Acked-by: David Miller # for net --- include/linux/filter.h | 12 ++++++------ include/linux/kvm_host.h | 2 +- include/linux/phy_led_triggers.h | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index a141cb07e76a..345f3748e0fb 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -420,7 +420,7 @@ static inline bool insn_is_zext(const struct bpf_insn *insn) #define BPF_FIELD_SIZEOF(type, field) \ ({ \ - const int __size = bytes_to_bpf_size(FIELD_SIZEOF(type, field)); \ + const int __size = bytes_to_bpf_size(sizeof_field(type, field)); \ BUILD_BUG_ON(__size < 0); \ __size; \ }) @@ -497,7 +497,7 @@ static inline bool insn_is_zext(const struct bpf_insn *insn) #define bpf_target_off(TYPE, MEMBER, SIZE, PTR_SIZE) \ ({ \ - BUILD_BUG_ON(FIELD_SIZEOF(TYPE, MEMBER) != (SIZE)); \ + BUILD_BUG_ON(sizeof_field(TYPE, MEMBER) != (SIZE)); \ *(PTR_SIZE) = (SIZE); \ offsetof(TYPE, MEMBER); \ }) @@ -608,7 +608,7 @@ static inline void bpf_compute_data_pointers(struct sk_buff *skb) { struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb; - BUILD_BUG_ON(sizeof(*cb) > FIELD_SIZEOF(struct sk_buff, cb)); + BUILD_BUG_ON(sizeof(*cb) > sizeof_field(struct sk_buff, cb)); cb->data_meta = skb->data - skb_metadata_len(skb); cb->data_end = skb->data + skb_headlen(skb); } @@ -646,9 +646,9 @@ static inline u8 *bpf_skb_cb(struct sk_buff *skb) * attached to sockets, we need to clear the bpf_skb_cb() area * to not leak previous contents to user space. */ - BUILD_BUG_ON(FIELD_SIZEOF(struct __sk_buff, cb) != BPF_SKB_CB_LEN); - BUILD_BUG_ON(FIELD_SIZEOF(struct __sk_buff, cb) != - FIELD_SIZEOF(struct qdisc_skb_cb, data)); + BUILD_BUG_ON(sizeof_field(struct __sk_buff, cb) != BPF_SKB_CB_LEN); + BUILD_BUG_ON(sizeof_field(struct __sk_buff, cb) != + sizeof_field(struct qdisc_skb_cb, data)); return qdisc_skb_cb(skb)->data; } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 7ed1e2f8641e..538c25e778c0 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -149,7 +149,7 @@ static inline bool is_error_page(struct page *page) #define KVM_REQUEST_ARCH_BASE 8 #define KVM_ARCH_REQ_FLAGS(nr, flags) ({ \ - BUILD_BUG_ON((unsigned)(nr) >= (FIELD_SIZEOF(struct kvm_vcpu, requests) * 8) - KVM_REQUEST_ARCH_BASE); \ + BUILD_BUG_ON((unsigned)(nr) >= (sizeof_field(struct kvm_vcpu, requests) * 8) - KVM_REQUEST_ARCH_BASE); \ (unsigned)(((nr) + KVM_REQUEST_ARCH_BASE) | (flags)); \ }) #define KVM_ARCH_REQ(nr) KVM_ARCH_REQ_FLAGS(nr, 0) diff --git a/include/linux/phy_led_triggers.h b/include/linux/phy_led_triggers.h index 3d507a8a6989..5c4d7a755101 100644 --- a/include/linux/phy_led_triggers.h +++ b/include/linux/phy_led_triggers.h @@ -14,7 +14,7 @@ struct phy_device; #define PHY_LED_TRIGGER_SPEED_SUFFIX_SIZE 11 #define PHY_LINK_LED_TRIGGER_NAME_SIZE (MII_BUS_ID_SIZE + \ - FIELD_SIZEOF(struct mdio_device, addr)+\ + sizeof_field(struct mdio_device, addr)+\ PHY_LED_TRIGGER_SPEED_SUFFIX_SIZE) struct phy_led_trigger { -- cgit v1.2.3 From 93c60483b5feefced92b869d5f97769495bc6313 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 9 Dec 2019 13:55:15 -0800 Subject: bus: ti-sysc: Fix missing force mstandby quirk handling Commit 03856e928b0e ("bus: ti-sysc: Handle mstandby quirk and use it for musb") added quirk handling for mstandby quirk but did not consider that we also need a quirk variant for SYSC_QUIRK_FORCE_MSTANDBY. We need to use forced idle mode for both SYSC_QUIRK_SWSUP_MSTANDBY and SYSC_QUIRK_FORCE_MSTANDBY, but SYSC_QUIRK_SWSUP_MSTANDBY also need to additionally also configure no-idle mode when enabled. Fixes: 03856e928b0e ("bus: ti-sysc: Handle mstandby quirk and use it for musb") Signed-off-by: Tony Lindgren --- include/linux/platform_data/ti-sysc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/ti-sysc.h b/include/linux/platform_data/ti-sysc.h index 0b9380475144..8cfe570fdece 100644 --- a/include/linux/platform_data/ti-sysc.h +++ b/include/linux/platform_data/ti-sysc.h @@ -49,6 +49,7 @@ struct sysc_regbits { s8 emufree_shift; }; +#define SYSC_QUIRK_FORCE_MSTANDBY BIT(20) #define SYSC_MODULE_QUIRK_AESS BIT(19) #define SYSC_MODULE_QUIRK_SGX BIT(18) #define SYSC_MODULE_QUIRK_HDQ1W BIT(17) -- cgit v1.2.3 From ff205766dbbee024a4a716638868d98ffb17748a Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Sun, 8 Dec 2019 16:01:12 -0800 Subject: ftrace: Fix function_graph tracer interaction with BPF trampoline Depending on type of BPF programs served by BPF trampoline it can call original function. In such case the trampoline will skip one stack frame while returning. That will confuse function_graph tracer and will cause crashes with bad RIP. Teach graph tracer to skip functions that have BPF trampoline attached. Signed-off-by: Alexei Starovoitov Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 232806d5689d..987c2dc55bde 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -264,6 +264,7 @@ int ftrace_modify_direct_caller(struct ftrace_func_entry *entry, struct dyn_ftrace *rec, unsigned long old_addr, unsigned long new_addr); +unsigned long ftrace_find_rec_direct(unsigned long ip); #else # define ftrace_direct_func_count 0 static inline int register_ftrace_direct(unsigned long ip, unsigned long addr) @@ -290,6 +291,10 @@ static inline int ftrace_modify_direct_caller(struct ftrace_func_entry *entry, { return -ENODEV; } +static inline unsigned long ftrace_find_rec_direct(unsigned long ip) +{ + return 0; +} #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ #ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS -- cgit v1.2.3 From 2c2f00ab1641895183488ff2bce53c415344fb87 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 6 Dec 2019 01:23:22 +0100 Subject: i2c: remove i2c_new_dummy() API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All in-kernel users have been converted to {devm_}i2c_new_dummy_device(). Remove the old API. Signed-off-by: Wolfram Sang Reviewed-by: Niklas Söderlund Tested-by: Luca Ceresoli Reviewed-by: Luca Ceresoli Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index d2f786706657..d1baf8d57536 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -466,12 +466,6 @@ i2c_new_probed_device(struct i2c_adapter *adap, /* Common custom probe functions */ extern int i2c_probe_func_quick_read(struct i2c_adapter *adap, unsigned short addr); -/* For devices that use several addresses, use i2c_new_dummy() to make - * client handles for the extra addresses. - */ -extern struct i2c_client * -i2c_new_dummy(struct i2c_adapter *adap, u16 address); - extern struct i2c_client * i2c_new_dummy_device(struct i2c_adapter *adapter, u16 address); -- cgit v1.2.3 From 3ac61258599b5abe45d97b926d44a79262385bae Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 8 Dec 2019 20:35:38 -0800 Subject: i2c: fix header file kernel-doc warning Fix kernel-doc warning in . ../include/linux/i2c.h:337: warning: Function parameter or member 'init_irq' not described in 'i2c_client' Signed-off-by: Randy Dunlap Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index d1baf8d57536..91954324f985 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -300,6 +300,7 @@ struct i2c_driver { * generic enough to hide second-sourcing and compatible revisions. * @adapter: manages the bus segment hosting this I2C device * @dev: Driver model device node for the slave. + * @init_irq: IRQ that was set at initialization * @irq: indicates the IRQ generated by this device (if any) * @detected: member of an i2c_driver.clients list or i2c-core's * userspace_devices list -- cgit v1.2.3 From 8c9312a925ad859daefd0f443ef3b6dc7157d881 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 6 Nov 2019 22:21:01 +0100 Subject: i2c: add helper to check if a client has a driver attached As a preparation for an API conversion, factor out something frequently used in the media subsystem. As an improvement, it bails out on both, NULL and ERRPTR to handle the old and new API. Signed-off-by: Wolfram Sang Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 91954324f985..582ef05ec07e 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -851,6 +851,11 @@ extern void i2c_del_driver(struct i2c_driver *driver); #define i2c_add_driver(driver) \ i2c_register_driver(THIS_MODULE, driver) +static inline bool i2c_client_has_driver(struct i2c_client *client) +{ + return !IS_ERR_OR_NULL(client) && client->dev.driver; +} + /* call the i2c_client->command() of all attached clients with * the given arguments */ extern void i2c_clients_command(struct i2c_adapter *adap, -- cgit v1.2.3 From b91e014f078e2e4f24778680e28dbbdecc7f0eb9 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Sun, 8 Dec 2019 16:01:13 -0800 Subject: bpf: Make BPF trampoline use register_ftrace_direct() API Make BPF trampoline attach its generated assembly code to kernel functions via register_ftrace_direct() API. It helps ftrace-based tracers co-exist with BPF trampoline on the same kernel function. It also switches attaching logic from arch specific text_poke to generic ftrace that is available on many architectures. text_poke is still necessary for bpf-to-bpf attach and for bpf_tail_call optimization. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20191209000114.1876138-3-ast@kernel.org --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 35903f148be5..ac7de5291509 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -461,6 +461,7 @@ struct bpf_trampoline { struct { struct btf_func_model model; void *addr; + bool ftrace_managed; } func; /* list of BPF programs using this trampoline */ struct hlist_head progs_hlist[BPF_TRAMP_MAX]; -- cgit v1.2.3 From 5e787dbf659fe77d56215be74044f85e01b3920f Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 23 Oct 2018 22:10:35 +0200 Subject: devtmpfs: use do_mount() instead of ksys_mount() In devtmpfs, do_mount() can be called directly instead of complex wrapping by ksys_mount(): - the first and third arguments are const strings in the kernel, and do not need to be copied over from userspace; - the fifth argument is NULL, and therefore no page needs to be copied over from userspace; - the second and fourth argument are passed through anyway. Signed-off-by: Dominik Brodowski --- include/linux/device.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index e226030c1df3..96ff76731e93 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1666,11 +1666,11 @@ extern bool kill_device(struct device *dev); #ifdef CONFIG_DEVTMPFS extern int devtmpfs_create_node(struct device *dev); extern int devtmpfs_delete_node(struct device *dev); -extern int devtmpfs_mount(const char *mntdir); +extern int devtmpfs_mount(void); #else static inline int devtmpfs_create_node(struct device *dev) { return 0; } static inline int devtmpfs_delete_node(struct device *dev) { return 0; } -static inline int devtmpfs_mount(const char *mountpoint) { return 0; } +static inline int devtmpfs_mount(void) { return 0; } #endif /* drivers/base/power/shutdown.c */ -- cgit v1.2.3 From cccaa5e33525fc07f4a2ce0518e50b9ddf435e47 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 23 Oct 2018 22:41:09 +0200 Subject: init: use do_mount() instead of ksys_mount() In prepare_namespace(), do_mount() can be used instead of ksys_mount() as the first and third argument are const strings in the kernel, the second and fourth argument are passed through anyway, and the fifth argument is NULL. In do_mount_root(), ksys_mount() is called with the first and third argument being already kernelspace strings, which do not need to be copied over from userspace to kernelspace (again). The second and fourth arguments are passed through to do_mount() anyway. The fifth argument, while already residing in kernelspace, needs to be put into a page of its own. Then, do_mount() can be used instead of ksys_mount(). Once this is done, there are no in-kernel users to ksys_mount() left, which can therefore be removed. Signed-off-by: Dominik Brodowski --- include/linux/syscalls.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index d0391cc2dae9..5262b7a76d39 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1231,8 +1231,6 @@ asmlinkage long sys_ni_syscall(void); * the ksys_xyzyyz() functions prototyped below. */ -int ksys_mount(const char __user *dev_name, const char __user *dir_name, - const char __user *type, unsigned long flags, void __user *data); int ksys_umount(char __user *name, int flags); int ksys_dup(unsigned int fildes); int ksys_chroot(const char __user *filename); -- cgit v1.2.3 From 5addeae1bedc4c126b179f61e43e039bb373581f Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Thu, 12 Dec 2019 16:52:00 +0100 Subject: blk-cgroup: remove blkcg_drain_queue Since blk_drain_queue had already been removed, so this function is not needed anymore. Signed-off-by: Guoqing Jiang Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 19394c77ed99..e4a6949fd171 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -188,7 +188,6 @@ struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg, struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, struct request_queue *q); int blkcg_init_queue(struct request_queue *q); -void blkcg_drain_queue(struct request_queue *q); void blkcg_exit_queue(struct request_queue *q); /* Blkio controller policy registration */ @@ -720,7 +719,6 @@ static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { ret static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q) { return NULL; } static inline int blkcg_init_queue(struct request_queue *q) { return 0; } -static inline void blkcg_drain_queue(struct request_queue *q) { } static inline void blkcg_exit_queue(struct request_queue *q) { } static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; } static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { } -- cgit v1.2.3 From 85572c2c4a45a541e880e087b5b17a48198b2416 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 11 Dec 2019 11:28:41 +0100 Subject: cpufreq: Avoid leaving stale IRQ work items during CPU offline The scheduler code calling cpufreq_update_util() may run during CPU offline on the target CPU after the IRQ work lists have been flushed for it, so the target CPU should be prevented from running code that may queue up an IRQ work item on it at that point. Unfortunately, that may not be the case if dvfs_possible_from_any_cpu is set for at least one cpufreq policy in the system, because that allows the CPU going offline to run the utilization update callback of the cpufreq governor on behalf of another (online) CPU in some cases. If that happens, the cpufreq governor callback may queue up an IRQ work on the CPU running it, which is going offline, and the IRQ work may not be flushed after that point. Moreover, that IRQ work cannot be flushed until the "offlining" CPU goes back online, so if any other CPU calls irq_work_sync() to wait for the completion of that IRQ work, it will have to wait until the "offlining" CPU is back online and that may not happen forever. In particular, a system-wide deadlock may occur during CPU online as a result of that. The failing scenario is as follows. CPU0 is the boot CPU, so it creates a cpufreq policy and becomes the "leader" of it (policy->cpu). It cannot go offline, because it is the boot CPU. Next, other CPUs join the cpufreq policy as they go online and they leave it when they go offline. The last CPU to go offline, say CPU3, may queue up an IRQ work while running the governor callback on behalf of CPU0 after leaving the cpufreq policy because of the dvfs_possible_from_any_cpu effect described above. Then, CPU0 is the only online CPU in the system and the stale IRQ work is still queued on CPU3. When, say, CPU1 goes back online, it will run irq_work_sync() to wait for that IRQ work to complete and so it will wait for CPU3 to go back online (which may never happen even in principle), but (worse yet) CPU0 is waiting for CPU1 at that point too and a system-wide deadlock occurs. To address this problem notice that CPUs which cannot run cpufreq utilization update code for themselves (for example, because they have left the cpufreq policies that they belonged to), should also be prevented from running that code on behalf of the other CPUs that belong to a cpufreq policy with dvfs_possible_from_any_cpu set and so in that case the cpufreq_update_util_data pointer of the CPU running the code must not be NULL as well as for the CPU which is the target of the cpufreq utilization update in progress. Accordingly, change cpufreq_this_cpu_can_update() into a regular function in kernel/sched/cpufreq.c (instead of a static inline in a header file) and make it check the cpufreq_update_util_data pointer of the local CPU if dvfs_possible_from_any_cpu is set for the target cpufreq policy. Also update the schedutil governor to do the cpufreq_this_cpu_can_update() check in the non-fast-switch case too to avoid the stale IRQ work issues. Fixes: 99d14d0e16fa ("cpufreq: Process remote callbacks from any CPU if the platform permits") Link: https://lore.kernel.org/linux-pm/20191121093557.bycvdo4xyinbc5cb@vireshk-i7/ Reported-by: Anson Huang Tested-by: Anson Huang Cc: 4.14+ # 4.14+ Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar Tested-by: Peng Fan (i.MX8QXP-MEK) Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 11 ----------- include/linux/sched/cpufreq.h | 3 +++ 2 files changed, 3 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 92d5fdc8154e..31b1b0e03df8 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -595,17 +595,6 @@ struct governor_attr { size_t count); }; -static inline bool cpufreq_this_cpu_can_update(struct cpufreq_policy *policy) -{ - /* - * Allow remote callbacks if: - * - dvfs_possible_from_any_cpu flag is set - * - the local and remote CPUs share cpufreq policy - */ - return policy->dvfs_possible_from_any_cpu || - cpumask_test_cpu(smp_processor_id(), policy->cpus); -} - /********************************************************************* * FREQUENCY TABLE HELPERS * *********************************************************************/ diff --git a/include/linux/sched/cpufreq.h b/include/linux/sched/cpufreq.h index afa940cd50dc..cc6bcc1e96bc 100644 --- a/include/linux/sched/cpufreq.h +++ b/include/linux/sched/cpufreq.h @@ -12,6 +12,8 @@ #define SCHED_CPUFREQ_MIGRATION (1U << 1) #ifdef CONFIG_CPU_FREQ +struct cpufreq_policy; + struct update_util_data { void (*func)(struct update_util_data *data, u64 time, unsigned int flags); }; @@ -20,6 +22,7 @@ void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data, void (*func)(struct update_util_data *data, u64 time, unsigned int flags)); void cpufreq_remove_update_util_hook(int cpu); +bool cpufreq_this_cpu_can_update(struct cpufreq_policy *policy); static inline unsigned long map_util_freq(unsigned long util, unsigned long freq, unsigned long cap) -- cgit v1.2.3 From b49a733d684e0096340b93e9dfd471f0e3ddc06d Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 23 Oct 2018 16:00:10 +0200 Subject: init: unify opening /dev/console as stdin/stdout/stderr Merge the two instances where /dev/console is opened as stdin/stdout/stderr. Signed-off-by: Dominik Brodowski --- include/linux/initrd.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/initrd.h b/include/linux/initrd.h index d77fe34fb00a..aa5914355728 100644 --- a/include/linux/initrd.h +++ b/include/linux/initrd.h @@ -28,3 +28,5 @@ extern unsigned int real_root_dev; extern char __initramfs_start[]; extern unsigned long __initramfs_size; + +void console_on_rootfs(void); -- cgit v1.2.3 From 8243186f0cc7c57cf9d6a110cd7315c44e3e0be8 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 23 Oct 2018 16:24:09 +0200 Subject: fs: remove ksys_dup() ksys_dup() is used only at one place in the kernel, namely to duplicate fd 0 of /dev/console to stdout and stderr. The same functionality can be achieved by using functions already available within the kernel namespace. Signed-off-by: Dominik Brodowski --- include/linux/syscalls.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 5262b7a76d39..2960dedcfde8 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1232,7 +1232,6 @@ asmlinkage long sys_ni_syscall(void); */ int ksys_umount(char __user *name, int flags); -int ksys_dup(unsigned int fildes); int ksys_chroot(const char __user *filename); ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count); int ksys_chdir(const char __user *filename); -- cgit v1.2.3 From 8dbd76e79a16b45b2ccb01d2f2e08dbf64e71e40 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 13 Dec 2019 18:20:41 -0800 Subject: tcp/dccp: fix possible race __inet_lookup_established() Michal Kubecek and Firo Yang did a very nice analysis of crashes happening in __inet_lookup_established(). Since a TCP socket can go from TCP_ESTABLISH to TCP_LISTEN (via a close()/socket()/listen() cycle) without a RCU grace period, I should not have changed listeners linkage in their hash table. They must use the nulls protocol (Documentation/RCU/rculist_nulls.txt), so that a lookup can detect a socket in a hash list was moved in another one. Since we added code in commit d296ba60d8e2 ("soreuseport: Resolve merge conflict for v4/v6 ordering fix"), we have to add hlist_nulls_add_tail_rcu() helper. Fixes: 3b24d854cb35 ("tcp/dccp: do not touch listener sk_refcnt under synflood") Signed-off-by: Eric Dumazet Reported-by: Michal Kubecek Reported-by: Firo Yang Reviewed-by: Michal Kubecek Link: https://lore.kernel.org/netdev/20191120083919.GH27852@unicorn.suse.cz/ Signed-off-by: Jakub Kicinski --- include/linux/rculist_nulls.h | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h index bc8206a8f30e..61974c4c566b 100644 --- a/include/linux/rculist_nulls.h +++ b/include/linux/rculist_nulls.h @@ -100,6 +100,43 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, first->pprev = &n->next; } +/** + * hlist_nulls_add_tail_rcu + * @n: the element to add to the hash list. + * @h: the list to add to. + * + * Description: + * Adds the specified element to the specified hlist_nulls, + * while permitting racing traversals. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_nulls_add_head_rcu() + * or hlist_nulls_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency + * problems on Alpha CPUs. Regardless of the type of CPU, the + * list-traversal primitive must be guarded by rcu_read_lock(). + */ +static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n, + struct hlist_nulls_head *h) +{ + struct hlist_nulls_node *i, *last = NULL; + + /* Note: write side code, so rcu accessors are not needed. */ + for (i = h->first; !is_a_nulls(i); i = i->next) + last = i; + + if (last) { + n->next = last->next; + n->pprev = &last->next; + rcu_assign_pointer(hlist_next_rcu(last), n); + } else { + hlist_nulls_add_head_rcu(n, h); + } +} + /** * hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type * @tpos: the type * to use as a loop cursor. -- cgit v1.2.3 From d82d47d5d8a6d40bb622cef31b6c6fa09cbd01ea Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 8 Dec 2019 20:31:32 -0800 Subject: jbd2: fix kernel-doc notation warning Fix kernel-doc warning by inserting a beginning '*' character for the kernel-doc line. ../include/linux/jbd2.h:461: warning: bad line: journal. These are dirty buffers and revoke descriptor blocks. Link: https://lore.kernel.org/r/53e3ce27-ceae-560d-0fd4-f95728a33e12@infradead.org Cc: stable@kernel.org Signed-off-by: Randy Dunlap Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 842b62606025..89bf48a81798 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -457,7 +457,7 @@ struct jbd2_revoke_table_s; * @h_journal: Which journal handle belongs to - used iff h_reserved set. * @h_rsv_handle: Handle reserved for finishing the logical operation. * @h_total_credits: Number of remaining buffers we are allowed to add to - journal. These are dirty buffers and revoke descriptor blocks. + * journal. These are dirty buffers and revoke descriptor blocks. * @h_revoke_credits: Number of remaining revoke records available for handle * @h_ref: Reference count on this handle. * @h_err: Field for caller's use to track errors through large fs operations. -- cgit v1.2.3 From a2ea07465c8d7984cc6b8b1f0b3324f9b138094a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 16 Dec 2019 17:49:00 +0100 Subject: bpf: Fix missing prog untrack in release_maps Commit da765a2f5993 ("bpf: Add poke dependency tracking for prog array maps") wrongly assumed that in case of prog load errors, we're cleaning up all program tracking via bpf_free_used_maps(). However, it can happen that we're still at the point where we didn't copy map pointers into the prog's aux section such that env->prog->aux->used_maps is still zero, running into a UAF. In such case, the verifier has similar release_maps() helper that drops references to used maps from its env. Consolidate the release code into __bpf_free_used_maps() and call it from all sides to fix it. Fixes: da765a2f5993 ("bpf: Add poke dependency tracking for prog array maps") Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/1c2909484ca524ae9f55109b06f22b6213e76376.1576514756.git.daniel@iogearbox.net --- include/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index ac7de5291509..085a59afba85 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -818,6 +818,8 @@ struct bpf_prog * __must_check bpf_prog_inc_not_zero(struct bpf_prog *prog); void bpf_prog_put(struct bpf_prog *prog); int __bpf_prog_charge(struct user_struct *user, u32 pages); void __bpf_prog_uncharge(struct user_struct *user, u32 pages); +void __bpf_free_used_maps(struct bpf_prog_aux *aux, + struct bpf_map **used_maps, u32 len); void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock); void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock); -- cgit v1.2.3 From 45477b3fe3d10376b649b1b85fce72b2f9f1da84 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Thu, 12 Dec 2019 12:58:35 -0500 Subject: security: keys: trusted: fix lost handle flush The original code, before it was moved into security/keys/trusted-keys had a flush after the blob unseal. Without that flush, the volatile handles increase in the TPM until it becomes unusable and the system either has to be rebooted or the TPM volatile area manually flushed. Fix by adding back the lost flush, which we now have to export because of the relocation of the trusted key code may cause the consumer to be modular. Signed-off-by: James Bottomley Fixes: 2e19e10131a0 ("KEYS: trusted: Move TPM2 trusted keys code") Reviewed-by: Jerry Snitselaar Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen --- include/linux/tpm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tpm.h b/include/linux/tpm.h index 0d6e949ba315..03e9b184411b 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -403,6 +403,7 @@ extern int tpm_pcr_extend(struct tpm_chip *chip, u32 pcr_idx, extern int tpm_send(struct tpm_chip *chip, void *cmd, size_t buflen); extern int tpm_get_random(struct tpm_chip *chip, u8 *data, size_t max); extern struct tpm_chip *tpm_default_chip(void); +void tpm2_flush_context(struct tpm_chip *chip, u32 handle); #else static inline int tpm_is_tpm2(struct tpm_chip *chip) { -- cgit v1.2.3 From e47304232b373362228bf233f17bd12b11c9aafc Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 17 Dec 2019 13:28:16 +0100 Subject: bpf: Fix cgroup local storage prog tracking Recently noticed that we're tracking programs related to local storage maps through their prog pointer. This is a wrong assumption since the prog pointer can still change throughout the verification process, for example, whenever bpf_patch_insn_single() is called. Therefore, the prog pointer that was assigned via bpf_cgroup_storage_assign() is not guaranteed to be the same as we pass in bpf_cgroup_storage_release() and the map would therefore remain in busy state forever. Fix this by using the prog's aux pointer which is stable throughout verification and beyond. Fixes: de9cbbaadba5 ("bpf: introduce cgroup storage maps") Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Cc: Roman Gushchin Cc: Martin KaFai Lau Link: https://lore.kernel.org/bpf/1471c69eca3022218666f909bc927a92388fd09e.1576580332.git.daniel@iogearbox.net --- include/linux/bpf-cgroup.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 169fd25f6bc2..9be71c195d74 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -157,8 +157,8 @@ void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage, struct cgroup *cgroup, enum bpf_attach_type type); void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage); -int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *map); -void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *map); +int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux, struct bpf_map *map); +void bpf_cgroup_storage_release(struct bpf_prog_aux *aux, struct bpf_map *map); int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, void *value); int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, @@ -360,9 +360,9 @@ static inline int cgroup_bpf_prog_query(const union bpf_attr *attr, static inline void bpf_cgroup_storage_set( struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) {} -static inline int bpf_cgroup_storage_assign(struct bpf_prog *prog, +static inline int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux, struct bpf_map *map) { return 0; } -static inline void bpf_cgroup_storage_release(struct bpf_prog *prog, +static inline void bpf_cgroup_storage_release(struct bpf_prog_aux *aux, struct bpf_map *map) {} static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc( struct bpf_prog *prog, enum bpf_cgroup_storage_type stype) { return NULL; } -- cgit v1.2.3 From d98c9e83b5e7ca78175df1b13ac4a6d460d3962d Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Tue, 17 Dec 2019 20:51:38 -0800 Subject: kasan: fix crashes on access to memory mapped by vm_map_ram() With CONFIG_KASAN_VMALLOC=y any use of memory obtained via vm_map_ram() will crash because there is no shadow backing that memory. Instead of sprinkling additional kasan_populate_vmalloc() calls all over the vmalloc code, move it into alloc_vmap_area(). This will fix vm_map_ram() and simplify the code a bit. [aryabinin@virtuozzo.com: v2] Link: http://lkml.kernel.org/r/20191205095942.1761-1-aryabinin@virtuozzo.comLink: http://lkml.kernel.org/r/20191204204534.32202-1-aryabinin@virtuozzo.com Fixes: 3c5c3cfb9ef4 ("kasan: support backing vmalloc space with real shadow memory") Signed-off-by: Andrey Ryabinin Reported-by: Dmitry Vyukov Reviewed-by: Uladzislau Rezki (Sony) Cc: Daniel Axtens Cc: Alexander Potapenko Cc: Daniel Axtens Cc: Qian Cai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 4f404c565db1..e18fe54969e9 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -205,20 +205,23 @@ static inline void *kasan_reset_tag(const void *addr) #endif /* CONFIG_KASAN_SW_TAGS */ #ifdef CONFIG_KASAN_VMALLOC -int kasan_populate_vmalloc(unsigned long requested_size, - struct vm_struct *area); -void kasan_poison_vmalloc(void *start, unsigned long size); +int kasan_populate_vmalloc(unsigned long addr, unsigned long size); +void kasan_poison_vmalloc(const void *start, unsigned long size); +void kasan_unpoison_vmalloc(const void *start, unsigned long size); void kasan_release_vmalloc(unsigned long start, unsigned long end, unsigned long free_region_start, unsigned long free_region_end); #else -static inline int kasan_populate_vmalloc(unsigned long requested_size, - struct vm_struct *area) +static inline int kasan_populate_vmalloc(unsigned long start, + unsigned long size) { return 0; } -static inline void kasan_poison_vmalloc(void *start, unsigned long size) {} +static inline void kasan_poison_vmalloc(const void *start, unsigned long size) +{ } +static inline void kasan_unpoison_vmalloc(const void *start, unsigned long size) +{ } static inline void kasan_release_vmalloc(unsigned long start, unsigned long end, unsigned long free_region_start, -- cgit v1.2.3 From be1db4753ee6a0db80a900df9dbbf6ad2acc4bd1 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Tue, 17 Dec 2019 20:51:41 -0800 Subject: mm/memory.c: add apply_to_existing_page_range() helper apply_to_page_range() takes an address range, and if any parts of it are not covered by the existing page table hierarchy, it allocates memory to fill them in. In some use cases, this is not what we want - we want to be able to operate exclusively on PTEs that are already in the tables. Add apply_to_existing_page_range() for this. Adjust the walker functions for apply_to_page_range to take 'create', which switches them between the old and new modes. This will be used in KASAN vmalloc. [akpm@linux-foundation.org: reduce code duplication] [akpm@linux-foundation.org: s/apply_to_existing_pages/apply_to_existing_page_range/] [akpm@linux-foundation.org: initialize __apply_to_page_range::err] Link: http://lkml.kernel.org/r/20191205140407.1874-1-dja@axtens.net Signed-off-by: Daniel Axtens Cc: Dmitry Vyukov Cc: Uladzislau Rezki (Sony) Cc: Alexander Potapenko Cc: Daniel Axtens Cc: Qian Cai Cc: Andrey Ryabinin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index c97ea3b694e6..80a9162b406c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2621,6 +2621,9 @@ static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags) typedef int (*pte_fn_t)(pte_t *pte, unsigned long addr, void *data); extern int apply_to_page_range(struct mm_struct *mm, unsigned long address, unsigned long size, pte_fn_t fn, void *data); +extern int apply_to_existing_page_range(struct mm_struct *mm, + unsigned long address, unsigned long size, + pte_fn_t fn, void *data); #ifdef CONFIG_PAGE_POISONING extern bool page_poisoning_enabled(void); -- cgit v1.2.3 From 1f26c0d3d24125992ab0026b0dab16c08df947c7 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 16 Dec 2019 18:52:45 -0800 Subject: net: fix kernel-doc warning in Fix missing '*' kernel-doc notation that causes this warning: ../include/linux/netdevice.h:1779: warning: bad line: spinlock Fixes: ab92d68fc22f ("net: core: add generic lockdep keys") Signed-off-by: Randy Dunlap Cc: Taehee Yoo Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9ef20389622d..ae5e260911e2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1775,7 +1775,7 @@ enum netdev_priv_flags { * for hardware timestamping * @sfp_bus: attached &struct sfp_bus structure. * @qdisc_tx_busylock_key: lockdep class annotating Qdisc->busylock - spinlock + * spinlock * @qdisc_running_key: lockdep class annotating Qdisc->running seqcount * @qdisc_xmit_lock_key: lockdep class annotating * netdev_queue->_xmit_lock spinlock -- cgit v1.2.3 From 0aa4d016c043d16a282e7e93edf6213a7b954c90 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 17 Dec 2019 18:07:41 +0100 Subject: of: mdio: export of_mdiobus_child_is_phy This patch exports of_mdiobus_child_is_phy, allowing to check if a child node is a network PHY. Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- include/linux/of_mdio.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h index 99cefe6f5edb..79bc82e30c02 100644 --- a/include/linux/of_mdio.h +++ b/include/linux/of_mdio.h @@ -12,6 +12,7 @@ #include #if IS_ENABLED(CONFIG_OF_MDIO) +extern bool of_mdiobus_child_is_phy(struct device_node *child); extern int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np); extern struct phy_device *of_phy_find_device(struct device_node *phy_np); extern struct phy_device *of_phy_connect(struct net_device *dev, @@ -54,6 +55,11 @@ static inline int of_mdio_parse_addr(struct device *dev, } #else /* CONFIG_OF_MDIO */ +static bool of_mdiobus_child_is_phy(struct device_node *child) +{ + return false; +} + static inline int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) { /* -- cgit v1.2.3 From d2ed49cf6c13e379c5819aa5ac20e1f9674ebc89 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 19 Dec 2019 23:24:47 +0000 Subject: mod_devicetable: fix PHY module format When a PHY is probed, if the top bit is set, we end up requesting a module with the string "mdio:-10101110000000100101000101010001" - the top bit is printed to a signed -1 value. This leads to the module not being loaded. Fix the module format string and the macro generating the values for it to ensure that we only print unsigned types and the top bit is always 0/1. We correctly end up with "mdio:10101110000000100101000101010001". Fixes: 8626d3b43280 ("phylib: Support phy module autoloading") Reviewed-by: Andrew Lunn Signed-off-by: Russell King Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/mod_devicetable.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 5714fd35a83c..e3596db077dc 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -587,9 +587,9 @@ struct platform_device_id { #define MDIO_NAME_SIZE 32 #define MDIO_MODULE_PREFIX "mdio:" -#define MDIO_ID_FMT "%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d" +#define MDIO_ID_FMT "%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u" #define MDIO_ID_ARGS(_id) \ - (_id)>>31, ((_id)>>30) & 1, ((_id)>>29) & 1, ((_id)>>28) & 1, \ + ((_id)>>31) & 1, ((_id)>>30) & 1, ((_id)>>29) & 1, ((_id)>>28) & 1, \ ((_id)>>27) & 1, ((_id)>>26) & 1, ((_id)>>25) & 1, ((_id)>>24) & 1, \ ((_id)>>23) & 1, ((_id)>>22) & 1, ((_id)>>21) & 1, ((_id)>>20) & 1, \ ((_id)>>19) & 1, ((_id)>>18) & 1, ((_id)>>17) & 1, ((_id)>>16) & 1, \ -- cgit v1.2.3 From 7d49a32a66d2215c5b3bf9bc67c9036ea9904111 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 19 Dec 2019 23:24:52 +0000 Subject: net: phy: ensure that phy IDs are correctly typed PHY IDs are 32-bit unsigned quantities. Ensure that they are always treated as such, and not passed around as "int"s. Fixes: 13d0ab6750b2 ("net: phy: check return code when requesting PHY driver module") Signed-off-by: Russell King Reviewed-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/phy.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 5032d453ac66..dd4a91f1feaa 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1000,7 +1000,7 @@ int phy_modify_paged_changed(struct phy_device *phydev, int page, u32 regnum, int phy_modify_paged(struct phy_device *phydev, int page, u32 regnum, u16 mask, u16 set); -struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id, +struct phy_device *phy_device_create(struct mii_bus *bus, int addr, u32 phy_id, bool is_c45, struct phy_c45_device_ids *c45_ids); #if IS_ENABLED(CONFIG_PHYLIB) -- cgit v1.2.3 From 8385d756e114f2df8568e508902d5f9850817ffb Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Fri, 13 Dec 2019 09:04:08 +0100 Subject: libata: Fix retrieving of active qcs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ata_qc_complete_multiple() is called with a mask of the still active tags. mv_sata doesn't have this information directly and instead calculates the still active tags from the started tags (ap->qc_active) and the finished tags as (ap->qc_active ^ done_mask) Since 28361c40368 the hw_tag and tag are no longer the same and the equation is no longer valid. In ata_exec_internal_sg() ap->qc_active is initialized as 1ULL << ATA_TAG_INTERNAL, but in hardware tag 0 is started and this will be in done_mask on completion. ap->qc_active ^ done_mask becomes 0x100000000 ^ 0x1 = 0x100000001 and thus tag 0 used as the internal tag will never be reported as completed. This is fixed by introducing ata_qc_get_active() which returns the active hardware tags and calling it where appropriate. This is tested on mv_sata, but sata_fsl and sata_nv suffer from the same problem. There is another case in sata_nv that most likely needs fixing as well, but this looks a little different, so I wasn't confident enough to change that. Fixes: 28361c403683 ("libata: add extra internal command") Cc: stable@vger.kernel.org Tested-by: Pali Rohár Signed-off-by: Sascha Hauer Add missing export of ata_qc_get_active(), as per Pali. Signed-off-by: Jens Axboe --- include/linux/libata.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index d3bbfddf616a..2dbde119721d 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1175,6 +1175,7 @@ extern unsigned int ata_do_dev_read_id(struct ata_device *dev, struct ata_taskfile *tf, u16 *id); extern void ata_qc_complete(struct ata_queued_cmd *qc); extern int ata_qc_complete_multiple(struct ata_port *ap, u64 qc_active); +extern u64 ata_qc_get_active(struct ata_port *ap); extern void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd); extern int ata_std_bios_param(struct scsi_device *sdev, struct block_device *bdev, -- cgit v1.2.3 From 84b032dbfdf1c139cd2b864e43959510646975f8 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 10 Dec 2019 10:53:44 -0800 Subject: ata: libahci_platform: Export again ahci_platform_able_phys() This reverts commit 6bb86fefa086faba7b60bb452300b76a47cde1a5 ("libahci_platform: Staticize ahci_platform_able_phys()") we are going to need ahci_platform_{enable,disable}_phys() in a subsequent commit for ahci_brcm.c in order to properly control the PHY initialization order. Also make sure the function prototypes are declared in include/linux/ahci_platform.h as a result. Cc: stable@vger.kernel.org Reviewed-by: Hans de Goede Signed-off-by: Florian Fainelli Signed-off-by: Jens Axboe --- include/linux/ahci_platform.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ahci_platform.h b/include/linux/ahci_platform.h index 6782f0d45ebe..49e5383d4222 100644 --- a/include/linux/ahci_platform.h +++ b/include/linux/ahci_platform.h @@ -19,6 +19,8 @@ struct ahci_host_priv; struct platform_device; struct scsi_host_template; +int ahci_platform_enable_phys(struct ahci_host_priv *hpriv); +void ahci_platform_disable_phys(struct ahci_host_priv *hpriv); int ahci_platform_enable_clks(struct ahci_host_priv *hpriv); void ahci_platform_disable_clks(struct ahci_host_priv *hpriv); int ahci_platform_enable_regulators(struct ahci_host_priv *hpriv); -- cgit v1.2.3