From 069e2b351de67e7a837b15b3d26c65c19b790cc3 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Fri, 14 Jun 2013 19:55:13 +0000 Subject: slob: Rework #ifdeffery in slab.h Make the SLOB specific stuff harmonize more with the way the other allocators do it. Create the typical kmalloc constants for that purpose. SLOB does not support it but the constants help us avoid #ifdefs. Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- include/linux/slab.h | 39 ++++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 0c621752caa6..9690c14eb7fb 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -169,11 +169,7 @@ struct kmem_cache { struct list_head list; /* List of all slab caches on the system */ }; -#define KMALLOC_MAX_SIZE (1UL << 30) - -#include - -#else /* CONFIG_SLOB */ +#endif /* CONFIG_SLOB */ /* * Kmalloc array related definitions @@ -195,7 +191,9 @@ struct kmem_cache { #ifndef KMALLOC_SHIFT_LOW #define KMALLOC_SHIFT_LOW 5 #endif -#else +#endif + +#ifdef CONFIG_SLUB /* * SLUB allocates up to order 2 pages directly and otherwise * passes the request to the page allocator. @@ -207,6 +205,19 @@ struct kmem_cache { #endif #endif +#ifdef CONFIG_SLOB +/* + * SLOB passes all page size and larger requests to the page allocator. + * No kmalloc array is necessary since objects of different sizes can + * be allocated from the same page. + */ +#define KMALLOC_SHIFT_MAX 30 +#define KMALLOC_SHIFT_HIGH PAGE_SHIFT +#ifndef KMALLOC_SHIFT_LOW +#define KMALLOC_SHIFT_LOW 3 +#endif +#endif + /* Maximum allocatable size */ #define KMALLOC_MAX_SIZE (1UL << KMALLOC_SHIFT_MAX) /* Maximum size for which we actually use a slab cache */ @@ -221,6 +232,7 @@ struct kmem_cache { #define KMALLOC_MIN_SIZE (1 << KMALLOC_SHIFT_LOW) #endif +#ifndef CONFIG_SLOB extern struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1]; #ifdef CONFIG_ZONE_DMA extern struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1]; @@ -275,13 +287,18 @@ static __always_inline int kmalloc_index(size_t size) /* Will never be reached. Needed because the compiler may complain */ return -1; } +#endif /* !CONFIG_SLOB */ #ifdef CONFIG_SLAB #include -#elif defined(CONFIG_SLUB) +#endif + +#ifdef CONFIG_SLUB #include -#else -#error "Unknown slab allocator" +#endif + +#ifdef CONFIG_SLOB +#include #endif /* @@ -291,6 +308,7 @@ static __always_inline int kmalloc_index(size_t size) */ static __always_inline int kmalloc_size(int n) { +#ifndef CONFIG_SLOB if (n > 2) return 1 << n; @@ -299,10 +317,9 @@ static __always_inline int kmalloc_size(int n) if (n == 2 && KMALLOC_MIN_SIZE <= 64) return 192; - +#endif return 0; } -#endif /* !CONFIG_SLOB */ /* * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment. -- cgit v1.2.3 From 3c00ea82c724fab0b98f15428a804cb45eb9ad38 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 19 May 2013 20:45:15 +0200 Subject: watchdog: Rename confusing state variable We have two very conflicting state variable names in the watchdog: * watchdog_enabled: This one reflects the user interface. It's set to 1 by default and can be overriden with boot options or sysctl/procfs interface. * watchdog_disabled: This is the internal toggle state that tells if watchdog threads, timers and NMI events are currently running or not. This state mostly depends on the user settings. It's a convenient state latch. Now we really need to find clearer names because those are just too confusing to encourage deep review. watchdog_enabled now becomes watchdog_user_enabled to reflect its purpose as an interface. watchdog_disabled becomes watchdog_running to suggest its role as a pure internal state. Signed-off-by: Frederic Weisbecker Cc: Srivatsa S. Bhat Cc: Anish Singh Cc: Steven Rostedt Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Li Zhong Cc: Don Zickus --- include/linux/nmi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index db50840e6355..6a45fb583ff1 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -46,7 +46,7 @@ static inline bool trigger_all_cpu_backtrace(void) #ifdef CONFIG_LOCKUP_DETECTOR int hw_nmi_is_cpu_stuck(struct pt_regs *); u64 hw_nmi_get_sample_period(int watchdog_thresh); -extern int watchdog_enabled; +extern int watchdog_user_enabled; extern int watchdog_thresh; struct ctl_table; extern int proc_dowatchdog(struct ctl_table *, int , -- cgit v1.2.3 From 8d36eb01da5d371feffa280e501377b5c450f5a5 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Wed, 29 May 2013 10:09:07 -0700 Subject: RDMA/cma: Define native IB address Define AF_IB and sockaddr_ib to allow the rdma_cm to use native IB addressing. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- include/linux/socket.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index b10ce4b341ea..230c04bda3e2 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -167,6 +167,7 @@ struct ucred { #define AF_PPPOX 24 /* PPPoX sockets */ #define AF_WANPIPE 25 /* Wanpipe API Sockets */ #define AF_LLC 26 /* Linux LLC */ +#define AF_IB 27 /* Native InfiniBand address */ #define AF_CAN 29 /* Controller Area Network */ #define AF_TIPC 30 /* TIPC sockets */ #define AF_BLUETOOTH 31 /* Bluetooth sockets */ @@ -211,6 +212,7 @@ struct ucred { #define PF_PPPOX AF_PPPOX #define PF_WANPIPE AF_WANPIPE #define PF_LLC AF_LLC +#define PF_IB AF_IB #define PF_CAN AF_CAN #define PF_TIPC AF_TIPC #define PF_BLUETOOTH AF_BLUETOOTH -- cgit v1.2.3 From 1c297a66654a3295ae87e2b7f3724d214eb2b5ec Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 1 Jul 2013 15:20:00 +0100 Subject: iio: Fix iio_channel_has_info Since the info_mask split, iio_channel_has_info() is not working correctly. info_mask_separate and info_mask_shared_by_type, it is not possible to compare them directly with the iio_chan_info_enum enum. Correct that bit using the BIT() macro. Cc: # 3.10.x Signed-off-by: Alexandre Belloni Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 8d171f427632..3d35b7023591 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -211,8 +211,8 @@ struct iio_chan_spec { static inline bool iio_channel_has_info(const struct iio_chan_spec *chan, enum iio_chan_info_enum type) { - return (chan->info_mask_separate & type) | - (chan->info_mask_shared_by_type & type); + return (chan->info_mask_separate & BIT(type)) | + (chan->info_mask_shared_by_type & BIT(type)); } #define IIO_ST(si, rb, sb, sh) \ -- cgit v1.2.3 From e7efa615ccf78394338144ff0187be331240748a Mon Sep 17 00:00:00 2001 From: Michael Opdenacker Date: Tue, 25 Jun 2013 18:16:55 +0200 Subject: slab: add kmalloc() to kernel API documentation At the moment, kmalloc() isn't even listed in the kernel API documentation (DocBook/kernel-api.html after running "make htmldocs"). Another issue is that the documentation for kmalloc_node() refers to kcalloc()'s documentation to describe its 'flags' parameter, while kcalloc() refered to kmalloc()'s documentation, which doesn't exist! This patch is a proposed fix for this. It also removes the documentation for kmalloc() in include/linux/slob_def.h which isn't included to generate the documentation anyway. This way, kmalloc() is described in only one place. Acked-by: Christoph Lameter Acked-by: Randy Dunlap Signed-off-by: Michael Opdenacker Signed-off-by: Pekka Enberg --- include/linux/slab.h | 18 ++++++++++++++---- include/linux/slob_def.h | 8 -------- 2 files changed, 14 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 9690c14eb7fb..6c5cc0ea8713 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -373,9 +373,8 @@ int cache_show(struct kmem_cache *s, struct seq_file *m); void print_slabinfo_header(struct seq_file *m); /** - * kmalloc_array - allocate memory for an array. - * @n: number of elements. - * @size: element size. + * kmalloc - allocate memory + * @size: how many bytes of memory are required. * @flags: the type of memory to allocate. * * The @flags argument may be one of: @@ -422,6 +421,17 @@ void print_slabinfo_header(struct seq_file *m); * There are other flags available as well, but these are not intended * for general use, and so are not documented here. For a full list of * potential flags, always refer to linux/gfp.h. + * + * kmalloc is the normal method of allocating memory + * in the kernel. + */ +static __always_inline void *kmalloc(size_t size, gfp_t flags); + +/** + * kmalloc_array - allocate memory for an array. + * @n: number of elements. + * @size: element size. + * @flags: the type of memory to allocate (see kmalloc). */ static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags) { @@ -445,7 +455,7 @@ static inline void *kcalloc(size_t n, size_t size, gfp_t flags) /** * kmalloc_node - allocate memory from a specific node * @size: how many bytes of memory are required. - * @flags: the type of memory to allocate (see kcalloc). + * @flags: the type of memory to allocate (see kmalloc). * @node: node to allocate from. * * kmalloc() for non-local nodes, used to allocate from a specific node diff --git a/include/linux/slob_def.h b/include/linux/slob_def.h index f28e14a12e3f..095a5a4a8516 100644 --- a/include/linux/slob_def.h +++ b/include/linux/slob_def.h @@ -18,14 +18,6 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) return __kmalloc_node(size, flags, node); } -/** - * kmalloc - allocate memory - * @size: how many bytes of memory are required. - * @flags: the type of memory to allocate (see kcalloc). - * - * kmalloc is the normal method of allocating memory - * in the kernel. - */ static __always_inline void *kmalloc(size_t size, gfp_t flags) { return __kmalloc_node(size, flags, NUMA_NO_NODE); -- cgit v1.2.3 From e126ba97dba9edeb6fafa3665b5f8497fc9cdf8c Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Sun, 7 Jul 2013 17:25:49 +0300 Subject: mlx5: Add driver for Mellanox Connect-IB adapters The driver is comprised of two kernel modules: mlx5_ib and mlx5_core. This partitioning resembles what we have for mlx4, except that mlx5_ib is the pci device driver and not mlx5_core. mlx5_core is essentially a library that provides general functionality that is intended to be used by other Mellanox devices that will be introduced in the future. mlx5_ib has a similar role as any hardware device under drivers/infiniband/hw. Signed-off-by: Eli Cohen Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz [ Merge in coccinelle fixes from Fengguang Wu . - Roland ] Signed-off-by: Roland Dreier --- include/linux/mlx5/cmd.h | 51 +++ include/linux/mlx5/cq.h | 165 ++++++++ include/linux/mlx5/device.h | 893 ++++++++++++++++++++++++++++++++++++++++++ include/linux/mlx5/doorbell.h | 79 ++++ include/linux/mlx5/driver.h | 769 ++++++++++++++++++++++++++++++++++++ include/linux/mlx5/qp.h | 467 ++++++++++++++++++++++ include/linux/mlx5/srq.h | 41 ++ 7 files changed, 2465 insertions(+) create mode 100644 include/linux/mlx5/cmd.h create mode 100644 include/linux/mlx5/cq.h create mode 100644 include/linux/mlx5/device.h create mode 100644 include/linux/mlx5/doorbell.h create mode 100644 include/linux/mlx5/driver.h create mode 100644 include/linux/mlx5/qp.h create mode 100644 include/linux/mlx5/srq.h (limited to 'include/linux') diff --git a/include/linux/mlx5/cmd.h b/include/linux/mlx5/cmd.h new file mode 100644 index 000000000000..2826a4b6071e --- /dev/null +++ b/include/linux/mlx5/cmd.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX5_CMD_H +#define MLX5_CMD_H + +#include + +struct manage_pages_layout { + u64 ptr; + u32 reserved; + u16 num_entries; + u16 func_id; +}; + + +struct mlx5_cmd_alloc_uar_imm_out { + u32 rsvd[3]; + u32 uarn; +}; + +#endif /* MLX5_CMD_H */ diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h new file mode 100644 index 000000000000..3db67f73d96d --- /dev/null +++ b/include/linux/mlx5/cq.h @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX5_CORE_CQ_H +#define MLX5_CORE_CQ_H + +#include +#include + + +struct mlx5_core_cq { + u32 cqn; + int cqe_sz; + __be32 *set_ci_db; + __be32 *arm_db; + atomic_t refcount; + struct completion free; + unsigned vector; + int irqn; + void (*comp) (struct mlx5_core_cq *); + void (*event) (struct mlx5_core_cq *, enum mlx5_event); + struct mlx5_uar *uar; + u32 cons_index; + unsigned arm_sn; + struct mlx5_rsc_debug *dbg; + int pid; +}; + + +enum { + MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR = 0x01, + MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR = 0x02, + MLX5_CQE_SYNDROME_LOCAL_PROT_ERR = 0x04, + MLX5_CQE_SYNDROME_WR_FLUSH_ERR = 0x05, + MLX5_CQE_SYNDROME_MW_BIND_ERR = 0x06, + MLX5_CQE_SYNDROME_BAD_RESP_ERR = 0x10, + MLX5_CQE_SYNDROME_LOCAL_ACCESS_ERR = 0x11, + MLX5_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR = 0x12, + MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR = 0x13, + MLX5_CQE_SYNDROME_REMOTE_OP_ERR = 0x14, + MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR = 0x15, + MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR = 0x16, + MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR = 0x22, +}; + +enum { + MLX5_CQE_OWNER_MASK = 1, + MLX5_CQE_REQ = 0, + MLX5_CQE_RESP_WR_IMM = 1, + MLX5_CQE_RESP_SEND = 2, + MLX5_CQE_RESP_SEND_IMM = 3, + MLX5_CQE_RESP_SEND_INV = 4, + MLX5_CQE_RESIZE_CQ = 0xff, /* TBD */ + MLX5_CQE_REQ_ERR = 13, + MLX5_CQE_RESP_ERR = 14, +}; + +enum { + MLX5_CQ_MODIFY_RESEIZE = 0, + MLX5_CQ_MODIFY_MODER = 1, + MLX5_CQ_MODIFY_MAPPING = 2, +}; + +struct mlx5_cq_modify_params { + int type; + union { + struct { + u32 page_offset; + u8 log_cq_size; + } resize; + + struct { + } moder; + + struct { + } mapping; + } params; +}; + +enum { + CQE_SIZE_64 = 0, + CQE_SIZE_128 = 1, +}; + +static inline int cqe_sz_to_mlx_sz(u8 size) +{ + return size == 64 ? CQE_SIZE_64 : CQE_SIZE_128; +} + +static inline void mlx5_cq_set_ci(struct mlx5_core_cq *cq) +{ + *cq->set_ci_db = cpu_to_be32(cq->cons_index & 0xffffff); +} + +enum { + MLX5_CQ_DB_REQ_NOT_SOL = 1 << 24, + MLX5_CQ_DB_REQ_NOT = 0 << 24 +}; + +static inline void mlx5_cq_arm(struct mlx5_core_cq *cq, u32 cmd, + void __iomem *uar_page, + spinlock_t *doorbell_lock) +{ + __be32 doorbell[2]; + u32 sn; + u32 ci; + + sn = cq->arm_sn & 3; + ci = cq->cons_index & 0xffffff; + + *cq->arm_db = cpu_to_be32(sn << 28 | cmd | ci); + + /* Make sure that the doorbell record in host memory is + * written before ringing the doorbell via PCI MMIO. + */ + wmb(); + + doorbell[0] = cpu_to_be32(sn << 28 | cmd | ci); + doorbell[1] = cpu_to_be32(cq->cqn); + + mlx5_write64(doorbell, uar_page + MLX5_CQ_DOORBELL, doorbell_lock); +} + +int mlx5_init_cq_table(struct mlx5_core_dev *dev); +void mlx5_cleanup_cq_table(struct mlx5_core_dev *dev); +int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, + struct mlx5_create_cq_mbox_in *in, int inlen); +int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq); +int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, + struct mlx5_query_cq_mbox_out *out); +int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, + int type, struct mlx5_cq_modify_params *params); +int mlx5_debug_cq_add(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq); +void mlx5_debug_cq_remove(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq); + +#endif /* MLX5_CORE_CQ_H */ diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h new file mode 100644 index 000000000000..51390915e538 --- /dev/null +++ b/include/linux/mlx5/device.h @@ -0,0 +1,893 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX5_DEVICE_H +#define MLX5_DEVICE_H + +#include +#include + +#if defined(__LITTLE_ENDIAN) +#define MLX5_SET_HOST_ENDIANNESS 0 +#elif defined(__BIG_ENDIAN) +#define MLX5_SET_HOST_ENDIANNESS 0x80 +#else +#error Host endianness not defined +#endif + +enum { + MLX5_MAX_COMMANDS = 32, + MLX5_CMD_DATA_BLOCK_SIZE = 512, + MLX5_PCI_CMD_XPORT = 7, +}; + +enum { + MLX5_EXTENDED_UD_AV = 0x80000000, +}; + +enum { + MLX5_CQ_STATE_ARMED = 9, + MLX5_CQ_STATE_ALWAYS_ARMED = 0xb, + MLX5_CQ_STATE_FIRED = 0xa, +}; + +enum { + MLX5_STAT_RATE_OFFSET = 5, +}; + +enum { + MLX5_INLINE_SEG = 0x80000000, +}; + +enum { + MLX5_PERM_LOCAL_READ = 1 << 2, + MLX5_PERM_LOCAL_WRITE = 1 << 3, + MLX5_PERM_REMOTE_READ = 1 << 4, + MLX5_PERM_REMOTE_WRITE = 1 << 5, + MLX5_PERM_ATOMIC = 1 << 6, + MLX5_PERM_UMR_EN = 1 << 7, +}; + +enum { + MLX5_PCIE_CTRL_SMALL_FENCE = 1 << 0, + MLX5_PCIE_CTRL_RELAXED_ORDERING = 1 << 2, + MLX5_PCIE_CTRL_NO_SNOOP = 1 << 3, + MLX5_PCIE_CTRL_TLP_PROCE_EN = 1 << 6, + MLX5_PCIE_CTRL_TPH_MASK = 3 << 4, +}; + +enum { + MLX5_ACCESS_MODE_PA = 0, + MLX5_ACCESS_MODE_MTT = 1, + MLX5_ACCESS_MODE_KLM = 2 +}; + +enum { + MLX5_MKEY_REMOTE_INVAL = 1 << 24, + MLX5_MKEY_FLAG_SYNC_UMR = 1 << 29, + MLX5_MKEY_BSF_EN = 1 << 30, + MLX5_MKEY_LEN64 = 1 << 31, +}; + +enum { + MLX5_EN_RD = (u64)1, + MLX5_EN_WR = (u64)2 +}; + +enum { + MLX5_BF_REGS_PER_PAGE = 4, + MLX5_MAX_UAR_PAGES = 1 << 8, + MLX5_MAX_UUARS = MLX5_MAX_UAR_PAGES * MLX5_BF_REGS_PER_PAGE, +}; + +enum { + MLX5_MKEY_MASK_LEN = 1ull << 0, + MLX5_MKEY_MASK_PAGE_SIZE = 1ull << 1, + MLX5_MKEY_MASK_START_ADDR = 1ull << 6, + MLX5_MKEY_MASK_PD = 1ull << 7, + MLX5_MKEY_MASK_EN_RINVAL = 1ull << 8, + MLX5_MKEY_MASK_BSF_EN = 1ull << 12, + MLX5_MKEY_MASK_KEY = 1ull << 13, + MLX5_MKEY_MASK_QPN = 1ull << 14, + MLX5_MKEY_MASK_LR = 1ull << 17, + MLX5_MKEY_MASK_LW = 1ull << 18, + MLX5_MKEY_MASK_RR = 1ull << 19, + MLX5_MKEY_MASK_RW = 1ull << 20, + MLX5_MKEY_MASK_A = 1ull << 21, + MLX5_MKEY_MASK_SMALL_FENCE = 1ull << 23, + MLX5_MKEY_MASK_FREE = 1ull << 29, +}; + +enum mlx5_event { + MLX5_EVENT_TYPE_COMP = 0x0, + + MLX5_EVENT_TYPE_PATH_MIG = 0x01, + MLX5_EVENT_TYPE_COMM_EST = 0x02, + MLX5_EVENT_TYPE_SQ_DRAINED = 0x03, + MLX5_EVENT_TYPE_SRQ_LAST_WQE = 0x13, + MLX5_EVENT_TYPE_SRQ_RQ_LIMIT = 0x14, + + MLX5_EVENT_TYPE_CQ_ERROR = 0x04, + MLX5_EVENT_TYPE_WQ_CATAS_ERROR = 0x05, + MLX5_EVENT_TYPE_PATH_MIG_FAILED = 0x07, + MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR = 0x10, + MLX5_EVENT_TYPE_WQ_ACCESS_ERROR = 0x11, + MLX5_EVENT_TYPE_SRQ_CATAS_ERROR = 0x12, + + MLX5_EVENT_TYPE_INTERNAL_ERROR = 0x08, + MLX5_EVENT_TYPE_PORT_CHANGE = 0x09, + MLX5_EVENT_TYPE_GPIO_EVENT = 0x15, + MLX5_EVENT_TYPE_REMOTE_CONFIG = 0x19, + + MLX5_EVENT_TYPE_DB_BF_CONGESTION = 0x1a, + MLX5_EVENT_TYPE_STALL_EVENT = 0x1b, + + MLX5_EVENT_TYPE_CMD = 0x0a, + MLX5_EVENT_TYPE_PAGE_REQUEST = 0xb, +}; + +enum { + MLX5_PORT_CHANGE_SUBTYPE_DOWN = 1, + MLX5_PORT_CHANGE_SUBTYPE_ACTIVE = 4, + MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED = 5, + MLX5_PORT_CHANGE_SUBTYPE_LID = 6, + MLX5_PORT_CHANGE_SUBTYPE_PKEY = 7, + MLX5_PORT_CHANGE_SUBTYPE_GUID = 8, + MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG = 9, +}; + +enum { + MLX5_DEV_CAP_FLAG_RC = 1LL << 0, + MLX5_DEV_CAP_FLAG_UC = 1LL << 1, + MLX5_DEV_CAP_FLAG_UD = 1LL << 2, + MLX5_DEV_CAP_FLAG_XRC = 1LL << 3, + MLX5_DEV_CAP_FLAG_SRQ = 1LL << 6, + MLX5_DEV_CAP_FLAG_BAD_PKEY_CNTR = 1LL << 8, + MLX5_DEV_CAP_FLAG_BAD_QKEY_CNTR = 1LL << 9, + MLX5_DEV_CAP_FLAG_APM = 1LL << 17, + MLX5_DEV_CAP_FLAG_ATOMIC = 1LL << 18, + MLX5_DEV_CAP_FLAG_ON_DMND_PG = 1LL << 24, + MLX5_DEV_CAP_FLAG_RESIZE_SRQ = 1LL << 32, + MLX5_DEV_CAP_FLAG_REMOTE_FENCE = 1LL << 38, + MLX5_DEV_CAP_FLAG_TLP_HINTS = 1LL << 39, + MLX5_DEV_CAP_FLAG_SIG_HAND_OVER = 1LL << 40, + MLX5_DEV_CAP_FLAG_DCT = 1LL << 41, + MLX5_DEV_CAP_FLAG_CMDIF_CSUM = 1LL << 46, +}; + +enum { + MLX5_OPCODE_NOP = 0x00, + MLX5_OPCODE_SEND_INVAL = 0x01, + MLX5_OPCODE_RDMA_WRITE = 0x08, + MLX5_OPCODE_RDMA_WRITE_IMM = 0x09, + MLX5_OPCODE_SEND = 0x0a, + MLX5_OPCODE_SEND_IMM = 0x0b, + MLX5_OPCODE_RDMA_READ = 0x10, + MLX5_OPCODE_ATOMIC_CS = 0x11, + MLX5_OPCODE_ATOMIC_FA = 0x12, + MLX5_OPCODE_ATOMIC_MASKED_CS = 0x14, + MLX5_OPCODE_ATOMIC_MASKED_FA = 0x15, + MLX5_OPCODE_BIND_MW = 0x18, + MLX5_OPCODE_CONFIG_CMD = 0x1f, + + MLX5_RECV_OPCODE_RDMA_WRITE_IMM = 0x00, + MLX5_RECV_OPCODE_SEND = 0x01, + MLX5_RECV_OPCODE_SEND_IMM = 0x02, + MLX5_RECV_OPCODE_SEND_INVAL = 0x03, + + MLX5_CQE_OPCODE_ERROR = 0x1e, + MLX5_CQE_OPCODE_RESIZE = 0x16, + + MLX5_OPCODE_SET_PSV = 0x20, + MLX5_OPCODE_GET_PSV = 0x21, + MLX5_OPCODE_CHECK_PSV = 0x22, + MLX5_OPCODE_RGET_PSV = 0x26, + MLX5_OPCODE_RCHECK_PSV = 0x27, + + MLX5_OPCODE_UMR = 0x25, + +}; + +enum { + MLX5_SET_PORT_RESET_QKEY = 0, + MLX5_SET_PORT_GUID0 = 16, + MLX5_SET_PORT_NODE_GUID = 17, + MLX5_SET_PORT_SYS_GUID = 18, + MLX5_SET_PORT_GID_TABLE = 19, + MLX5_SET_PORT_PKEY_TABLE = 20, +}; + +enum { + MLX5_MAX_PAGE_SHIFT = 31 +}; + +struct mlx5_inbox_hdr { + __be16 opcode; + u8 rsvd[4]; + __be16 opmod; +}; + +struct mlx5_outbox_hdr { + u8 status; + u8 rsvd[3]; + __be32 syndrome; +}; + +struct mlx5_cmd_query_adapter_mbox_in { + struct mlx5_inbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_cmd_query_adapter_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd0[24]; + u8 intapin; + u8 rsvd1[13]; + __be16 vsd_vendor_id; + u8 vsd[208]; + u8 vsd_psid[16]; +}; + +struct mlx5_hca_cap { + u8 rsvd1[16]; + u8 log_max_srq_sz; + u8 log_max_qp_sz; + u8 rsvd2; + u8 log_max_qp; + u8 log_max_strq_sz; + u8 log_max_srqs; + u8 rsvd4[2]; + u8 rsvd5; + u8 log_max_cq_sz; + u8 rsvd6; + u8 log_max_cq; + u8 log_max_eq_sz; + u8 log_max_mkey; + u8 rsvd7; + u8 log_max_eq; + u8 max_indirection; + u8 log_max_mrw_sz; + u8 log_max_bsf_list_sz; + u8 log_max_klm_list_sz; + u8 rsvd_8_0; + u8 log_max_ra_req_dc; + u8 rsvd_8_1; + u8 log_max_ra_res_dc; + u8 rsvd9; + u8 log_max_ra_req_qp; + u8 rsvd10; + u8 log_max_ra_res_qp; + u8 rsvd11[4]; + __be16 max_qp_count; + __be16 rsvd12; + u8 rsvd13; + u8 local_ca_ack_delay; + u8 rsvd14; + u8 num_ports; + u8 log_max_msg; + u8 rsvd15[3]; + __be16 stat_rate_support; + u8 rsvd16[2]; + __be64 flags; + u8 rsvd17; + u8 uar_sz; + u8 rsvd18; + u8 log_pg_sz; + __be16 bf_log_bf_reg_size; + u8 rsvd19[4]; + __be16 max_desc_sz_sq; + u8 rsvd20[2]; + __be16 max_desc_sz_rq; + u8 rsvd21[2]; + __be16 max_desc_sz_sq_dc; + u8 rsvd22[4]; + __be16 max_qp_mcg; + u8 rsvd23; + u8 log_max_mcg; + u8 rsvd24; + u8 log_max_pd; + u8 rsvd25; + u8 log_max_xrcd; + u8 rsvd26[40]; + __be32 uar_page_sz; + u8 rsvd27[28]; + u8 log_msx_atomic_size_qp; + u8 rsvd28[2]; + u8 log_msx_atomic_size_dc; + u8 rsvd29[76]; +}; + + +struct mlx5_cmd_query_hca_cap_mbox_in { + struct mlx5_inbox_hdr hdr; + u8 rsvd[8]; +}; + + +struct mlx5_cmd_query_hca_cap_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd0[8]; + struct mlx5_hca_cap hca_cap; +}; + + +struct mlx5_cmd_set_hca_cap_mbox_in { + struct mlx5_inbox_hdr hdr; + u8 rsvd[8]; + struct mlx5_hca_cap hca_cap; +}; + + +struct mlx5_cmd_set_hca_cap_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd0[8]; +}; + + +struct mlx5_cmd_init_hca_mbox_in { + struct mlx5_inbox_hdr hdr; + u8 rsvd0[2]; + __be16 profile; + u8 rsvd1[4]; +}; + +struct mlx5_cmd_init_hca_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_cmd_teardown_hca_mbox_in { + struct mlx5_inbox_hdr hdr; + u8 rsvd0[2]; + __be16 profile; + u8 rsvd1[4]; +}; + +struct mlx5_cmd_teardown_hca_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_cmd_layout { + u8 type; + u8 rsvd0[3]; + __be32 inlen; + __be64 in_ptr; + __be32 in[4]; + __be32 out[4]; + __be64 out_ptr; + __be32 outlen; + u8 token; + u8 sig; + u8 rsvd1; + u8 status_own; +}; + + +struct health_buffer { + __be32 assert_var[5]; + __be32 rsvd0[3]; + __be32 assert_exit_ptr; + __be32 assert_callra; + __be32 rsvd1[2]; + __be32 fw_ver; + __be32 hw_id; + __be32 rsvd2; + u8 irisc_index; + u8 synd; + __be16 ext_sync; +}; + +struct mlx5_init_seg { + __be32 fw_rev; + __be32 cmdif_rev_fw_sub; + __be32 rsvd0[2]; + __be32 cmdq_addr_h; + __be32 cmdq_addr_l_sz; + __be32 cmd_dbell; + __be32 rsvd1[121]; + struct health_buffer health; + __be32 rsvd2[884]; + __be32 health_counter; + __be32 rsvd3[1023]; + __be64 ieee1588_clk; + __be32 ieee1588_clk_type; + __be32 clr_intx; +}; + +struct mlx5_eqe_comp { + __be32 reserved[6]; + __be32 cqn; +}; + +struct mlx5_eqe_qp_srq { + __be32 reserved[6]; + __be32 qp_srq_n; +}; + +struct mlx5_eqe_cq_err { + __be32 cqn; + u8 reserved1[7]; + u8 syndrome; +}; + +struct mlx5_eqe_dropped_packet { +}; + +struct mlx5_eqe_port_state { + u8 reserved0[8]; + u8 port; +}; + +struct mlx5_eqe_gpio { + __be32 reserved0[2]; + __be64 gpio_event; +}; + +struct mlx5_eqe_congestion { + u8 type; + u8 rsvd0; + u8 congestion_level; +}; + +struct mlx5_eqe_stall_vl { + u8 rsvd0[3]; + u8 port_vl; +}; + +struct mlx5_eqe_cmd { + __be32 vector; + __be32 rsvd[6]; +}; + +struct mlx5_eqe_page_req { + u8 rsvd0[2]; + __be16 func_id; + u8 rsvd1[2]; + __be16 num_pages; + __be32 rsvd2[5]; +}; + +union ev_data { + __be32 raw[7]; + struct mlx5_eqe_cmd cmd; + struct mlx5_eqe_comp comp; + struct mlx5_eqe_qp_srq qp_srq; + struct mlx5_eqe_cq_err cq_err; + struct mlx5_eqe_dropped_packet dp; + struct mlx5_eqe_port_state port; + struct mlx5_eqe_gpio gpio; + struct mlx5_eqe_congestion cong; + struct mlx5_eqe_stall_vl stall_vl; + struct mlx5_eqe_page_req req_pages; +} __packed; + +struct mlx5_eqe { + u8 rsvd0; + u8 type; + u8 rsvd1; + u8 sub_type; + __be32 rsvd2[7]; + union ev_data data; + __be16 rsvd3; + u8 signature; + u8 owner; +} __packed; + +struct mlx5_cmd_prot_block { + u8 data[MLX5_CMD_DATA_BLOCK_SIZE]; + u8 rsvd0[48]; + __be64 next; + __be32 block_num; + u8 rsvd1; + u8 token; + u8 ctrl_sig; + u8 sig; +}; + +struct mlx5_err_cqe { + u8 rsvd0[32]; + __be32 srqn; + u8 rsvd1[18]; + u8 vendor_err_synd; + u8 syndrome; + __be32 s_wqe_opcode_qpn; + __be16 wqe_counter; + u8 signature; + u8 op_own; +}; + +struct mlx5_cqe64 { + u8 rsvd0[17]; + u8 ml_path; + u8 rsvd20[4]; + __be16 slid; + __be32 flags_rqpn; + u8 rsvd28[4]; + __be32 srqn; + __be32 imm_inval_pkey; + u8 rsvd40[4]; + __be32 byte_cnt; + __be64 timestamp; + __be32 sop_drop_qpn; + __be16 wqe_counter; + u8 signature; + u8 op_own; +}; + +struct mlx5_wqe_srq_next_seg { + u8 rsvd0[2]; + __be16 next_wqe_index; + u8 signature; + u8 rsvd1[11]; +}; + +union mlx5_ext_cqe { + struct ib_grh grh; + u8 inl[64]; +}; + +struct mlx5_cqe128 { + union mlx5_ext_cqe inl_grh; + struct mlx5_cqe64 cqe64; +}; + +struct mlx5_srq_ctx { + u8 state_log_sz; + u8 rsvd0[3]; + __be32 flags_xrcd; + __be32 pgoff_cqn; + u8 rsvd1[4]; + u8 log_pg_sz; + u8 rsvd2[7]; + __be32 pd; + __be16 lwm; + __be16 wqe_cnt; + u8 rsvd3[8]; + __be64 db_record; +}; + +struct mlx5_create_srq_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 input_srqn; + u8 rsvd0[4]; + struct mlx5_srq_ctx ctx; + u8 rsvd1[208]; + __be64 pas[0]; +}; + +struct mlx5_create_srq_mbox_out { + struct mlx5_outbox_hdr hdr; + __be32 srqn; + u8 rsvd[4]; +}; + +struct mlx5_destroy_srq_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 srqn; + u8 rsvd[4]; +}; + +struct mlx5_destroy_srq_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_query_srq_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 srqn; + u8 rsvd0[4]; +}; + +struct mlx5_query_srq_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd0[8]; + struct mlx5_srq_ctx ctx; + u8 rsvd1[32]; + __be64 pas[0]; +}; + +struct mlx5_arm_srq_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 srqn; + __be16 rsvd; + __be16 lwm; +}; + +struct mlx5_arm_srq_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_cq_context { + u8 status; + u8 cqe_sz_flags; + u8 st; + u8 rsvd3; + u8 rsvd4[6]; + __be16 page_offset; + __be32 log_sz_usr_page; + __be16 cq_period; + __be16 cq_max_count; + __be16 rsvd20; + __be16 c_eqn; + u8 log_pg_sz; + u8 rsvd25[7]; + __be32 last_notified_index; + __be32 solicit_producer_index; + __be32 consumer_counter; + __be32 producer_counter; + u8 rsvd48[8]; + __be64 db_record_addr; +}; + +struct mlx5_create_cq_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 input_cqn; + u8 rsvdx[4]; + struct mlx5_cq_context ctx; + u8 rsvd6[192]; + __be64 pas[0]; +}; + +struct mlx5_create_cq_mbox_out { + struct mlx5_outbox_hdr hdr; + __be32 cqn; + u8 rsvd0[4]; +}; + +struct mlx5_destroy_cq_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 cqn; + u8 rsvd0[4]; +}; + +struct mlx5_destroy_cq_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd0[8]; +}; + +struct mlx5_query_cq_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 cqn; + u8 rsvd0[4]; +}; + +struct mlx5_query_cq_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd0[8]; + struct mlx5_cq_context ctx; + u8 rsvd6[16]; + __be64 pas[0]; +}; + +struct mlx5_eq_context { + u8 status; + u8 ec_oi; + u8 st; + u8 rsvd2[7]; + __be16 page_pffset; + __be32 log_sz_usr_page; + u8 rsvd3[7]; + u8 intr; + u8 log_page_size; + u8 rsvd4[15]; + __be32 consumer_counter; + __be32 produser_counter; + u8 rsvd5[16]; +}; + +struct mlx5_create_eq_mbox_in { + struct mlx5_inbox_hdr hdr; + u8 rsvd0[3]; + u8 input_eqn; + u8 rsvd1[4]; + struct mlx5_eq_context ctx; + u8 rsvd2[8]; + __be64 events_mask; + u8 rsvd3[176]; + __be64 pas[0]; +}; + +struct mlx5_create_eq_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd0[3]; + u8 eq_number; + u8 rsvd1[4]; +}; + +struct mlx5_destroy_eq_mbox_in { + struct mlx5_inbox_hdr hdr; + u8 rsvd0[3]; + u8 eqn; + u8 rsvd1[4]; +}; + +struct mlx5_destroy_eq_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_map_eq_mbox_in { + struct mlx5_inbox_hdr hdr; + __be64 mask; + u8 mu; + u8 rsvd0[2]; + u8 eqn; + u8 rsvd1[24]; +}; + +struct mlx5_map_eq_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_query_eq_mbox_in { + struct mlx5_inbox_hdr hdr; + u8 rsvd0[3]; + u8 eqn; + u8 rsvd1[4]; +}; + +struct mlx5_query_eq_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; + struct mlx5_eq_context ctx; +}; + +struct mlx5_mkey_seg { + /* This is a two bit field occupying bits 31-30. + * bit 31 is always 0, + * bit 30 is zero for regular MRs and 1 (e.g free) for UMRs that do not have tanslation + */ + u8 status; + u8 pcie_control; + u8 flags; + u8 version; + __be32 qpn_mkey7_0; + u8 rsvd1[4]; + __be32 flags_pd; + __be64 start_addr; + __be64 len; + __be32 bsfs_octo_size; + u8 rsvd2[16]; + __be32 xlt_oct_size; + u8 rsvd3[3]; + u8 log2_page_size; + u8 rsvd4[4]; +}; + +struct mlx5_query_special_ctxs_mbox_in { + struct mlx5_inbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_query_special_ctxs_mbox_out { + struct mlx5_outbox_hdr hdr; + __be32 dump_fill_mkey; + __be32 reserved_lkey; +}; + +struct mlx5_create_mkey_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 input_mkey_index; + u8 rsvd0[4]; + struct mlx5_mkey_seg seg; + u8 rsvd1[16]; + __be32 xlat_oct_act_size; + __be32 bsf_coto_act_size; + u8 rsvd2[168]; + __be64 pas[0]; +}; + +struct mlx5_create_mkey_mbox_out { + struct mlx5_outbox_hdr hdr; + __be32 mkey; + u8 rsvd[4]; +}; + +struct mlx5_destroy_mkey_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 mkey; + u8 rsvd[4]; +}; + +struct mlx5_destroy_mkey_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_query_mkey_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 mkey; +}; + +struct mlx5_query_mkey_mbox_out { + struct mlx5_outbox_hdr hdr; + __be64 pas[0]; +}; + +struct mlx5_modify_mkey_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 mkey; + __be64 pas[0]; +}; + +struct mlx5_modify_mkey_mbox_out { + struct mlx5_outbox_hdr hdr; +}; + +struct mlx5_dump_mkey_mbox_in { + struct mlx5_inbox_hdr hdr; +}; + +struct mlx5_dump_mkey_mbox_out { + struct mlx5_outbox_hdr hdr; + __be32 mkey; +}; + +struct mlx5_mad_ifc_mbox_in { + struct mlx5_inbox_hdr hdr; + __be16 remote_lid; + u8 rsvd0; + u8 port; + u8 rsvd1[4]; + u8 data[256]; +}; + +struct mlx5_mad_ifc_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; + u8 data[256]; +}; + +struct mlx5_access_reg_mbox_in { + struct mlx5_inbox_hdr hdr; + u8 rsvd0[2]; + __be16 register_id; + __be32 arg; + __be32 data[0]; +}; + +struct mlx5_access_reg_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; + __be32 data[0]; +}; + +#define MLX5_ATTR_EXTENDED_PORT_INFO cpu_to_be16(0xff90) + +enum { + MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO = 1 << 0 +}; + +#endif /* MLX5_DEVICE_H */ diff --git a/include/linux/mlx5/doorbell.h b/include/linux/mlx5/doorbell.h new file mode 100644 index 000000000000..163a818411e7 --- /dev/null +++ b/include/linux/mlx5/doorbell.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX5_DOORBELL_H +#define MLX5_DOORBELL_H + +#define MLX5_BF_OFFSET 0x800 +#define MLX5_CQ_DOORBELL 0x20 + +#if BITS_PER_LONG == 64 +/* Assume that we can just write a 64-bit doorbell atomically. s390 + * actually doesn't have writeq() but S/390 systems don't even have + * PCI so we won't worry about it. + */ + +#define MLX5_DECLARE_DOORBELL_LOCK(name) +#define MLX5_INIT_DOORBELL_LOCK(ptr) do { } while (0) +#define MLX5_GET_DOORBELL_LOCK(ptr) (NULL) + +static inline void mlx5_write64(__be32 val[2], void __iomem *dest, + spinlock_t *doorbell_lock) +{ + __raw_writeq(*(u64 *)val, dest); +} + +#else + +/* Just fall back to a spinlock to protect the doorbell if + * BITS_PER_LONG is 32 -- there's no portable way to do atomic 64-bit + * MMIO writes. + */ + +#define MLX5_DECLARE_DOORBELL_LOCK(name) spinlock_t name; +#define MLX5_INIT_DOORBELL_LOCK(ptr) spin_lock_init(ptr) +#define MLX5_GET_DOORBELL_LOCK(ptr) (ptr) + +static inline void mlx5_write64(__be32 val[2], void __iomem *dest, + spinlock_t *doorbell_lock) +{ + unsigned long flags; + + spin_lock_irqsave(doorbell_lock, flags); + __raw_writel((__force u32) val[0], dest); + __raw_writel((__force u32) val[1], dest + 4); + spin_unlock_irqrestore(doorbell_lock, flags); +} + +#endif + +#endif /* MLX5_DOORBELL_H */ diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h new file mode 100644 index 000000000000..e47f1e4c9b03 --- /dev/null +++ b/include/linux/mlx5/driver.h @@ -0,0 +1,769 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX5_DRIVER_H +#define MLX5_DRIVER_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +enum { + MLX5_BOARD_ID_LEN = 64, + MLX5_MAX_NAME_LEN = 16, +}; + +enum { + /* one minute for the sake of bringup. Generally, commands must always + * complete and we may need to increase this timeout value + */ + MLX5_CMD_TIMEOUT_MSEC = 7200 * 1000, + MLX5_CMD_WQ_MAX_NAME = 32, +}; + +enum { + CMD_OWNER_SW = 0x0, + CMD_OWNER_HW = 0x1, + CMD_STATUS_SUCCESS = 0, +}; + +enum mlx5_sqp_t { + MLX5_SQP_SMI = 0, + MLX5_SQP_GSI = 1, + MLX5_SQP_IEEE_1588 = 2, + MLX5_SQP_SNIFFER = 3, + MLX5_SQP_SYNC_UMR = 4, +}; + +enum { + MLX5_MAX_PORTS = 2, +}; + +enum { + MLX5_EQ_VEC_PAGES = 0, + MLX5_EQ_VEC_CMD = 1, + MLX5_EQ_VEC_ASYNC = 2, + MLX5_EQ_VEC_COMP_BASE, +}; + +enum { + MLX5_MAX_EQ_NAME = 20 +}; + +enum { + MLX5_ATOMIC_MODE_IB_COMP = 1 << 16, + MLX5_ATOMIC_MODE_CX = 2 << 16, + MLX5_ATOMIC_MODE_8B = 3 << 16, + MLX5_ATOMIC_MODE_16B = 4 << 16, + MLX5_ATOMIC_MODE_32B = 5 << 16, + MLX5_ATOMIC_MODE_64B = 6 << 16, + MLX5_ATOMIC_MODE_128B = 7 << 16, + MLX5_ATOMIC_MODE_256B = 8 << 16, +}; + +enum { + MLX5_CMD_OP_QUERY_HCA_CAP = 0x100, + MLX5_CMD_OP_QUERY_ADAPTER = 0x101, + MLX5_CMD_OP_INIT_HCA = 0x102, + MLX5_CMD_OP_TEARDOWN_HCA = 0x103, + MLX5_CMD_OP_QUERY_PAGES = 0x107, + MLX5_CMD_OP_MANAGE_PAGES = 0x108, + MLX5_CMD_OP_SET_HCA_CAP = 0x109, + + MLX5_CMD_OP_CREATE_MKEY = 0x200, + MLX5_CMD_OP_QUERY_MKEY = 0x201, + MLX5_CMD_OP_DESTROY_MKEY = 0x202, + MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS = 0x203, + + MLX5_CMD_OP_CREATE_EQ = 0x301, + MLX5_CMD_OP_DESTROY_EQ = 0x302, + MLX5_CMD_OP_QUERY_EQ = 0x303, + + MLX5_CMD_OP_CREATE_CQ = 0x400, + MLX5_CMD_OP_DESTROY_CQ = 0x401, + MLX5_CMD_OP_QUERY_CQ = 0x402, + MLX5_CMD_OP_MODIFY_CQ = 0x403, + + MLX5_CMD_OP_CREATE_QP = 0x500, + MLX5_CMD_OP_DESTROY_QP = 0x501, + MLX5_CMD_OP_RST2INIT_QP = 0x502, + MLX5_CMD_OP_INIT2RTR_QP = 0x503, + MLX5_CMD_OP_RTR2RTS_QP = 0x504, + MLX5_CMD_OP_RTS2RTS_QP = 0x505, + MLX5_CMD_OP_SQERR2RTS_QP = 0x506, + MLX5_CMD_OP_2ERR_QP = 0x507, + MLX5_CMD_OP_RTS2SQD_QP = 0x508, + MLX5_CMD_OP_SQD2RTS_QP = 0x509, + MLX5_CMD_OP_2RST_QP = 0x50a, + MLX5_CMD_OP_QUERY_QP = 0x50b, + MLX5_CMD_OP_CONF_SQP = 0x50c, + MLX5_CMD_OP_MAD_IFC = 0x50d, + MLX5_CMD_OP_INIT2INIT_QP = 0x50e, + MLX5_CMD_OP_SUSPEND_QP = 0x50f, + MLX5_CMD_OP_UNSUSPEND_QP = 0x510, + MLX5_CMD_OP_SQD2SQD_QP = 0x511, + MLX5_CMD_OP_ALLOC_QP_COUNTER_SET = 0x512, + MLX5_CMD_OP_DEALLOC_QP_COUNTER_SET = 0x513, + MLX5_CMD_OP_QUERY_QP_COUNTER_SET = 0x514, + + MLX5_CMD_OP_CREATE_PSV = 0x600, + MLX5_CMD_OP_DESTROY_PSV = 0x601, + MLX5_CMD_OP_QUERY_PSV = 0x602, + MLX5_CMD_OP_QUERY_SIG_RULE_TABLE = 0x603, + MLX5_CMD_OP_QUERY_BLOCK_SIZE_TABLE = 0x604, + + MLX5_CMD_OP_CREATE_SRQ = 0x700, + MLX5_CMD_OP_DESTROY_SRQ = 0x701, + MLX5_CMD_OP_QUERY_SRQ = 0x702, + MLX5_CMD_OP_ARM_RQ = 0x703, + MLX5_CMD_OP_RESIZE_SRQ = 0x704, + + MLX5_CMD_OP_ALLOC_PD = 0x800, + MLX5_CMD_OP_DEALLOC_PD = 0x801, + MLX5_CMD_OP_ALLOC_UAR = 0x802, + MLX5_CMD_OP_DEALLOC_UAR = 0x803, + + MLX5_CMD_OP_ATTACH_TO_MCG = 0x806, + MLX5_CMD_OP_DETACH_FROM_MCG = 0x807, + + + MLX5_CMD_OP_ALLOC_XRCD = 0x80e, + MLX5_CMD_OP_DEALLOC_XRCD = 0x80f, + + MLX5_CMD_OP_ACCESS_REG = 0x805, + MLX5_CMD_OP_MAX = 0x810, +}; + +enum { + MLX5_REG_PCAP = 0x5001, + MLX5_REG_PMTU = 0x5003, + MLX5_REG_PTYS = 0x5004, + MLX5_REG_PAOS = 0x5006, + MLX5_REG_PMAOS = 0x5012, + MLX5_REG_PUDE = 0x5009, + MLX5_REG_PMPE = 0x5010, + MLX5_REG_PELC = 0x500e, + MLX5_REG_PMLP = 0, /* TBD */ + MLX5_REG_NODE_DESC = 0x6001, + MLX5_REG_HOST_ENDIANNESS = 0x7004, +}; + +enum dbg_rsc_type { + MLX5_DBG_RSC_QP, + MLX5_DBG_RSC_EQ, + MLX5_DBG_RSC_CQ, +}; + +struct mlx5_field_desc { + struct dentry *dent; + int i; +}; + +struct mlx5_rsc_debug { + struct mlx5_core_dev *dev; + void *object; + enum dbg_rsc_type type; + struct dentry *root; + struct mlx5_field_desc fields[0]; +}; + +enum mlx5_dev_event { + MLX5_DEV_EVENT_SYS_ERROR, + MLX5_DEV_EVENT_PORT_UP, + MLX5_DEV_EVENT_PORT_DOWN, + MLX5_DEV_EVENT_PORT_INITIALIZED, + MLX5_DEV_EVENT_LID_CHANGE, + MLX5_DEV_EVENT_PKEY_CHANGE, + MLX5_DEV_EVENT_GUID_CHANGE, + MLX5_DEV_EVENT_CLIENT_REREG, +}; + +struct mlx5_uuar_info { + struct mlx5_uar *uars; + int num_uars; + int num_low_latency_uuars; + unsigned long *bitmap; + unsigned int *count; + struct mlx5_bf *bfs; + + /* + * protect uuar allocation data structs + */ + struct mutex lock; +}; + +struct mlx5_bf { + void __iomem *reg; + void __iomem *regreg; + int buf_size; + struct mlx5_uar *uar; + unsigned long offset; + int need_lock; + /* protect blue flame buffer selection when needed + */ + spinlock_t lock; + + /* serialize 64 bit writes when done as two 32 bit accesses + */ + spinlock_t lock32; + int uuarn; +}; + +struct mlx5_cmd_first { + __be32 data[4]; +}; + +struct mlx5_cmd_msg { + struct list_head list; + struct cache_ent *cache; + u32 len; + struct mlx5_cmd_first first; + struct mlx5_cmd_mailbox *next; +}; + +struct mlx5_cmd_debug { + struct dentry *dbg_root; + struct dentry *dbg_in; + struct dentry *dbg_out; + struct dentry *dbg_outlen; + struct dentry *dbg_status; + struct dentry *dbg_run; + void *in_msg; + void *out_msg; + u8 status; + u16 inlen; + u16 outlen; +}; + +struct cache_ent { + /* protect block chain allocations + */ + spinlock_t lock; + struct list_head head; +}; + +struct cmd_msg_cache { + struct cache_ent large; + struct cache_ent med; + +}; + +struct mlx5_cmd_stats { + u64 sum; + u64 n; + struct dentry *root; + struct dentry *avg; + struct dentry *count; + /* protect command average calculations */ + spinlock_t lock; +}; + +struct mlx5_cmd { + void *cmd_buf; + dma_addr_t dma; + u16 cmdif_rev; + u8 log_sz; + u8 log_stride; + int max_reg_cmds; + int events; + u32 __iomem *vector; + + /* protect command queue allocations + */ + spinlock_t alloc_lock; + + /* protect token allocations + */ + spinlock_t token_lock; + u8 token; + unsigned long bitmask; + char wq_name[MLX5_CMD_WQ_MAX_NAME]; + struct workqueue_struct *wq; + struct semaphore sem; + struct semaphore pages_sem; + int mode; + struct mlx5_cmd_work_ent *ent_arr[MLX5_MAX_COMMANDS]; + struct pci_pool *pool; + struct mlx5_cmd_debug dbg; + struct cmd_msg_cache cache; + int checksum_disabled; + struct mlx5_cmd_stats stats[MLX5_CMD_OP_MAX]; +}; + +struct mlx5_port_caps { + int gid_table_len; + int pkey_table_len; +}; + +struct mlx5_caps { + u8 log_max_eq; + u8 log_max_cq; + u8 log_max_qp; + u8 log_max_mkey; + u8 log_max_pd; + u8 log_max_srq; + u32 max_cqes; + int max_wqes; + int max_sq_desc_sz; + int max_rq_desc_sz; + u64 flags; + u16 stat_rate_support; + int log_max_msg; + int num_ports; + int max_ra_res_qp; + int max_ra_req_qp; + int max_srq_wqes; + int bf_reg_size; + int bf_regs_per_page; + struct mlx5_port_caps port[MLX5_MAX_PORTS]; + u8 ext_port_cap[MLX5_MAX_PORTS]; + int max_vf; + u32 reserved_lkey; + u8 local_ca_ack_delay; + u8 log_max_mcg; + u16 max_qp_mcg; + int min_page_sz; +}; + +struct mlx5_cmd_mailbox { + void *buf; + dma_addr_t dma; + struct mlx5_cmd_mailbox *next; +}; + +struct mlx5_buf_list { + void *buf; + dma_addr_t map; +}; + +struct mlx5_buf { + struct mlx5_buf_list direct; + struct mlx5_buf_list *page_list; + int nbufs; + int npages; + int page_shift; + int size; +}; + +struct mlx5_eq { + struct mlx5_core_dev *dev; + __be32 __iomem *doorbell; + u32 cons_index; + struct mlx5_buf buf; + int size; + u8 irqn; + u8 eqn; + int nent; + u64 mask; + char name[MLX5_MAX_EQ_NAME]; + struct list_head list; + int index; + struct mlx5_rsc_debug *dbg; +}; + + +struct mlx5_core_mr { + u64 iova; + u64 size; + u32 key; + u32 pd; + u32 access; +}; + +struct mlx5_core_srq { + u32 srqn; + int max; + int max_gs; + int max_avail_gather; + int wqe_shift; + void (*event) (struct mlx5_core_srq *, enum mlx5_event); + + atomic_t refcount; + struct completion free; +}; + +struct mlx5_eq_table { + void __iomem *update_ci; + void __iomem *update_arm_ci; + struct list_head *comp_eq_head; + struct mlx5_eq pages_eq; + struct mlx5_eq async_eq; + struct mlx5_eq cmd_eq; + struct msix_entry *msix_arr; + int num_comp_vectors; + /* protect EQs list + */ + spinlock_t lock; +}; + +struct mlx5_uar { + u32 index; + struct list_head bf_list; + unsigned free_bf_bmap; + void __iomem *wc_map; + void __iomem *map; +}; + + +struct mlx5_core_health { + struct health_buffer __iomem *health; + __be32 __iomem *health_counter; + struct timer_list timer; + struct list_head list; + u32 prev; + int miss_counter; +}; + +struct mlx5_cq_table { + /* protect radix tree + */ + spinlock_t lock; + struct radix_tree_root tree; +}; + +struct mlx5_qp_table { + /* protect radix tree + */ + spinlock_t lock; + struct radix_tree_root tree; +}; + +struct mlx5_srq_table { + /* protect radix tree + */ + spinlock_t lock; + struct radix_tree_root tree; +}; + +struct mlx5_priv { + char name[MLX5_MAX_NAME_LEN]; + struct mlx5_eq_table eq_table; + struct mlx5_uuar_info uuari; + MLX5_DECLARE_DOORBELL_LOCK(cq_uar_lock); + + /* pages stuff */ + struct workqueue_struct *pg_wq; + struct rb_root page_root; + int fw_pages; + int reg_pages; + + struct mlx5_core_health health; + + struct mlx5_srq_table srq_table; + + /* start: qp staff */ + struct mlx5_qp_table qp_table; + struct dentry *qp_debugfs; + struct dentry *eq_debugfs; + struct dentry *cq_debugfs; + struct dentry *cmdif_debugfs; + /* end: qp staff */ + + /* start: cq staff */ + struct mlx5_cq_table cq_table; + /* end: cq staff */ + + /* start: alloc staff */ + struct mutex pgdir_mutex; + struct list_head pgdir_list; + /* end: alloc staff */ + struct dentry *dbg_root; + + /* protect mkey key part */ + spinlock_t mkey_lock; + u8 mkey_key; +}; + +struct mlx5_core_dev { + struct pci_dev *pdev; + u8 rev_id; + char board_id[MLX5_BOARD_ID_LEN]; + struct mlx5_cmd cmd; + struct mlx5_caps caps; + phys_addr_t iseg_base; + struct mlx5_init_seg __iomem *iseg; + void (*event) (struct mlx5_core_dev *dev, + enum mlx5_dev_event event, + void *data); + struct mlx5_priv priv; + struct mlx5_profile *profile; + atomic_t num_qps; +}; + +struct mlx5_db { + __be32 *db; + union { + struct mlx5_db_pgdir *pgdir; + struct mlx5_ib_user_db_page *user_page; + } u; + dma_addr_t dma; + int index; +}; + +enum { + MLX5_DB_PER_PAGE = PAGE_SIZE / L1_CACHE_BYTES, +}; + +enum { + MLX5_COMP_EQ_SIZE = 1024, +}; + +struct mlx5_db_pgdir { + struct list_head list; + DECLARE_BITMAP(bitmap, MLX5_DB_PER_PAGE); + __be32 *db_page; + dma_addr_t db_dma; +}; + +typedef void (*mlx5_cmd_cbk_t)(int status, void *context); + +struct mlx5_cmd_work_ent { + struct mlx5_cmd_msg *in; + struct mlx5_cmd_msg *out; + mlx5_cmd_cbk_t callback; + void *context; + int idx; + struct completion done; + struct mlx5_cmd *cmd; + struct work_struct work; + struct mlx5_cmd_layout *lay; + int ret; + int page_queue; + u8 status; + u8 token; + struct timespec ts1; + struct timespec ts2; +}; + +struct mlx5_pas { + u64 pa; + u8 log_sz; +}; + +static inline void *mlx5_buf_offset(struct mlx5_buf *buf, int offset) +{ + if (likely(BITS_PER_LONG == 64 || buf->nbufs == 1)) + return buf->direct.buf + offset; + else + return buf->page_list[offset >> PAGE_SHIFT].buf + + (offset & (PAGE_SIZE - 1)); +} + +extern struct workqueue_struct *mlx5_core_wq; + +#define STRUCT_FIELD(header, field) \ + .struct_offset_bytes = offsetof(struct ib_unpacked_ ## header, field), \ + .struct_size_bytes = sizeof((struct ib_unpacked_ ## header *)0)->field + +struct ib_field { + size_t struct_offset_bytes; + size_t struct_size_bytes; + int offset_bits; + int size_bits; +}; + +static inline struct mlx5_core_dev *pci2mlx5_core_dev(struct pci_dev *pdev) +{ + return pci_get_drvdata(pdev); +} + +extern struct dentry *mlx5_debugfs_root; + +static inline u16 fw_rev_maj(struct mlx5_core_dev *dev) +{ + return ioread32be(&dev->iseg->fw_rev) & 0xffff; +} + +static inline u16 fw_rev_min(struct mlx5_core_dev *dev) +{ + return ioread32be(&dev->iseg->fw_rev) >> 16; +} + +static inline u16 fw_rev_sub(struct mlx5_core_dev *dev) +{ + return ioread32be(&dev->iseg->cmdif_rev_fw_sub) & 0xffff; +} + +static inline u16 cmdif_rev(struct mlx5_core_dev *dev) +{ + return ioread32be(&dev->iseg->cmdif_rev_fw_sub) >> 16; +} + +static inline void *mlx5_vzalloc(unsigned long size) +{ + void *rtn; + + rtn = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); + if (!rtn) + rtn = vzalloc(size); + return rtn; +} + +static inline void mlx5_vfree(const void *addr) +{ + if (addr && is_vmalloc_addr(addr)) + vfree(addr); + else + kfree(addr); +} + +int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev); +void mlx5_dev_cleanup(struct mlx5_core_dev *dev); +int mlx5_cmd_init(struct mlx5_core_dev *dev); +void mlx5_cmd_cleanup(struct mlx5_core_dev *dev); +void mlx5_cmd_use_events(struct mlx5_core_dev *dev); +void mlx5_cmd_use_polling(struct mlx5_core_dev *dev); +int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr); +int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, + int out_size); +int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn); +int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn); +int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari); +int mlx5_free_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari); +void mlx5_health_cleanup(void); +void __init mlx5_health_init(void); +void mlx5_start_health_poll(struct mlx5_core_dev *dev); +void mlx5_stop_health_poll(struct mlx5_core_dev *dev); +int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, int max_direct, + struct mlx5_buf *buf); +void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf); +struct mlx5_cmd_mailbox *mlx5_alloc_cmd_mailbox_chain(struct mlx5_core_dev *dev, + gfp_t flags, int npages); +void mlx5_free_cmd_mailbox_chain(struct mlx5_core_dev *dev, + struct mlx5_cmd_mailbox *head); +int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_create_srq_mbox_in *in, int inlen); +int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq); +int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_query_srq_mbox_out *out); +int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, + u16 lwm, int is_srq); +int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, + struct mlx5_create_mkey_mbox_in *in, int inlen); +int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr); +int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, + struct mlx5_query_mkey_mbox_out *out, int outlen); +int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, + u32 *mkey); +int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn); +int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn); +int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, void *inb, void *outb, + u16 opmod, int port); +void mlx5_pagealloc_init(struct mlx5_core_dev *dev); +void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev); +int mlx5_pagealloc_start(struct mlx5_core_dev *dev); +void mlx5_pagealloc_stop(struct mlx5_core_dev *dev); +void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, + s16 npages); +int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev); +int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev); +void mlx5_register_debugfs(void); +void mlx5_unregister_debugfs(void); +int mlx5_eq_init(struct mlx5_core_dev *dev); +void mlx5_eq_cleanup(struct mlx5_core_dev *dev); +void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas); +void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn); +void mlx5_qp_event(struct mlx5_core_dev *dev, u32 qpn, int event_type); +void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type); +struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn); +void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector); +void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type); +int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, + int nent, u64 mask, const char *name, struct mlx5_uar *uar); +int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq); +int mlx5_start_eqs(struct mlx5_core_dev *dev); +int mlx5_stop_eqs(struct mlx5_core_dev *dev); +int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); +int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); + +int mlx5_qp_debugfs_init(struct mlx5_core_dev *dev); +void mlx5_qp_debugfs_cleanup(struct mlx5_core_dev *dev); +int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, + int size_in, void *data_out, int size_out, + u16 reg_num, int arg, int write); +int mlx5_set_port_caps(struct mlx5_core_dev *dev, int port_num, u32 caps); + +int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq); +void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq); +int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, + struct mlx5_query_eq_mbox_out *out, int outlen); +int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev); +void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev); +int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev); +void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev); +int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db); +void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db); + +typedef void (*health_handler_t)(struct pci_dev *pdev, void *buf, int size); +int mlx5_register_health_report_handler(health_handler_t handler); +void mlx5_unregister_health_report_handler(void); +const char *mlx5_command_str(int command); +int mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev); +void mlx5_cmdif_debugfs_cleanup(struct mlx5_core_dev *dev); + +static inline u32 mlx5_mkey_to_idx(u32 mkey) +{ + return mkey >> 8; +} + +static inline u32 mlx5_idx_to_mkey(u32 mkey_idx) +{ + return mkey_idx << 8; +} + +enum { + MLX5_PROF_MASK_QP_SIZE = (u64)1 << 0, + MLX5_PROF_MASK_CMDIF_CSUM = (u64)1 << 1, + MLX5_PROF_MASK_MR_CACHE = (u64)1 << 2, +}; + +enum { + MAX_MR_CACHE_ENTRIES = 16, +}; + +struct mlx5_profile { + u64 mask; + u32 log_max_qp; + int cmdif_csum; + struct { + int size; + int limit; + } mr_cache[MAX_MR_CACHE_ENTRIES]; +}; + +#endif /* MLX5_DRIVER_H */ diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h new file mode 100644 index 000000000000..d9e3eacb3a7f --- /dev/null +++ b/include/linux/mlx5/qp.h @@ -0,0 +1,467 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX5_QP_H +#define MLX5_QP_H + +#include +#include + +#define MLX5_INVALID_LKEY 0x100 + +enum mlx5_qp_optpar { + MLX5_QP_OPTPAR_ALT_ADDR_PATH = 1 << 0, + MLX5_QP_OPTPAR_RRE = 1 << 1, + MLX5_QP_OPTPAR_RAE = 1 << 2, + MLX5_QP_OPTPAR_RWE = 1 << 3, + MLX5_QP_OPTPAR_PKEY_INDEX = 1 << 4, + MLX5_QP_OPTPAR_Q_KEY = 1 << 5, + MLX5_QP_OPTPAR_RNR_TIMEOUT = 1 << 6, + MLX5_QP_OPTPAR_PRIMARY_ADDR_PATH = 1 << 7, + MLX5_QP_OPTPAR_SRA_MAX = 1 << 8, + MLX5_QP_OPTPAR_RRA_MAX = 1 << 9, + MLX5_QP_OPTPAR_PM_STATE = 1 << 10, + MLX5_QP_OPTPAR_RETRY_COUNT = 1 << 12, + MLX5_QP_OPTPAR_RNR_RETRY = 1 << 13, + MLX5_QP_OPTPAR_ACK_TIMEOUT = 1 << 14, + MLX5_QP_OPTPAR_PRI_PORT = 1 << 16, + MLX5_QP_OPTPAR_SRQN = 1 << 18, + MLX5_QP_OPTPAR_CQN_RCV = 1 << 19, + MLX5_QP_OPTPAR_DC_HS = 1 << 20, + MLX5_QP_OPTPAR_DC_KEY = 1 << 21, +}; + +enum mlx5_qp_state { + MLX5_QP_STATE_RST = 0, + MLX5_QP_STATE_INIT = 1, + MLX5_QP_STATE_RTR = 2, + MLX5_QP_STATE_RTS = 3, + MLX5_QP_STATE_SQER = 4, + MLX5_QP_STATE_SQD = 5, + MLX5_QP_STATE_ERR = 6, + MLX5_QP_STATE_SQ_DRAINING = 7, + MLX5_QP_STATE_SUSPENDED = 9, + MLX5_QP_NUM_STATE +}; + +enum { + MLX5_QP_ST_RC = 0x0, + MLX5_QP_ST_UC = 0x1, + MLX5_QP_ST_UD = 0x2, + MLX5_QP_ST_XRC = 0x3, + MLX5_QP_ST_MLX = 0x4, + MLX5_QP_ST_DCI = 0x5, + MLX5_QP_ST_DCT = 0x6, + MLX5_QP_ST_QP0 = 0x7, + MLX5_QP_ST_QP1 = 0x8, + MLX5_QP_ST_RAW_ETHERTYPE = 0x9, + MLX5_QP_ST_RAW_IPV6 = 0xa, + MLX5_QP_ST_SNIFFER = 0xb, + MLX5_QP_ST_SYNC_UMR = 0xe, + MLX5_QP_ST_PTP_1588 = 0xd, + MLX5_QP_ST_REG_UMR = 0xc, + MLX5_QP_ST_MAX +}; + +enum { + MLX5_QP_PM_MIGRATED = 0x3, + MLX5_QP_PM_ARMED = 0x0, + MLX5_QP_PM_REARM = 0x1 +}; + +enum { + MLX5_NON_ZERO_RQ = 0 << 24, + MLX5_SRQ_RQ = 1 << 24, + MLX5_CRQ_RQ = 2 << 24, + MLX5_ZERO_LEN_RQ = 3 << 24 +}; + +enum { + /* params1 */ + MLX5_QP_BIT_SRE = 1 << 15, + MLX5_QP_BIT_SWE = 1 << 14, + MLX5_QP_BIT_SAE = 1 << 13, + /* params2 */ + MLX5_QP_BIT_RRE = 1 << 15, + MLX5_QP_BIT_RWE = 1 << 14, + MLX5_QP_BIT_RAE = 1 << 13, + MLX5_QP_BIT_RIC = 1 << 4, +}; + +enum { + MLX5_WQE_CTRL_CQ_UPDATE = 2 << 2, + MLX5_WQE_CTRL_SOLICITED = 1 << 1, +}; + +enum { + MLX5_SEND_WQE_BB = 64, +}; + +enum { + MLX5_WQE_FMR_PERM_LOCAL_READ = 1 << 27, + MLX5_WQE_FMR_PERM_LOCAL_WRITE = 1 << 28, + MLX5_WQE_FMR_PERM_REMOTE_READ = 1 << 29, + MLX5_WQE_FMR_PERM_REMOTE_WRITE = 1 << 30, + MLX5_WQE_FMR_PERM_ATOMIC = 1 << 31 +}; + +enum { + MLX5_FENCE_MODE_NONE = 0 << 5, + MLX5_FENCE_MODE_INITIATOR_SMALL = 1 << 5, + MLX5_FENCE_MODE_STRONG_ORDERING = 3 << 5, + MLX5_FENCE_MODE_SMALL_AND_FENCE = 4 << 5, +}; + +enum { + MLX5_QP_LAT_SENSITIVE = 1 << 28, + MLX5_QP_ENABLE_SIG = 1 << 31, +}; + +enum { + MLX5_RCV_DBR = 0, + MLX5_SND_DBR = 1, +}; + +struct mlx5_wqe_fmr_seg { + __be32 flags; + __be32 mem_key; + __be64 buf_list; + __be64 start_addr; + __be64 reg_len; + __be32 offset; + __be32 page_size; + u32 reserved[2]; +}; + +struct mlx5_wqe_ctrl_seg { + __be32 opmod_idx_opcode; + __be32 qpn_ds; + u8 signature; + u8 rsvd[2]; + u8 fm_ce_se; + __be32 imm; +}; + +struct mlx5_wqe_xrc_seg { + __be32 xrc_srqn; + u8 rsvd[12]; +}; + +struct mlx5_wqe_masked_atomic_seg { + __be64 swap_add; + __be64 compare; + __be64 swap_add_mask; + __be64 compare_mask; +}; + +struct mlx5_av { + union { + struct { + __be32 qkey; + __be32 reserved; + } qkey; + __be64 dc_key; + } key; + __be32 dqp_dct; + u8 stat_rate_sl; + u8 fl_mlid; + __be16 rlid; + u8 reserved0[10]; + u8 tclass; + u8 hop_limit; + __be32 grh_gid_fl; + u8 rgid[16]; +}; + +struct mlx5_wqe_datagram_seg { + struct mlx5_av av; +}; + +struct mlx5_wqe_raddr_seg { + __be64 raddr; + __be32 rkey; + u32 reserved; +}; + +struct mlx5_wqe_atomic_seg { + __be64 swap_add; + __be64 compare; +}; + +struct mlx5_wqe_data_seg { + __be32 byte_count; + __be32 lkey; + __be64 addr; +}; + +struct mlx5_wqe_umr_ctrl_seg { + u8 flags; + u8 rsvd0[3]; + __be16 klm_octowords; + __be16 bsf_octowords; + __be64 mkey_mask; + u8 rsvd1[32]; +}; + +struct mlx5_seg_set_psv { + __be32 psv_num; + __be16 syndrome; + __be16 status; + __be32 transient_sig; + __be32 ref_tag; +}; + +struct mlx5_seg_get_psv { + u8 rsvd[19]; + u8 num_psv; + __be32 l_key; + __be64 va; + __be32 psv_index[4]; +}; + +struct mlx5_seg_check_psv { + u8 rsvd0[2]; + __be16 err_coalescing_op; + u8 rsvd1[2]; + __be16 xport_err_op; + u8 rsvd2[2]; + __be16 xport_err_mask; + u8 rsvd3[7]; + u8 num_psv; + __be32 l_key; + __be64 va; + __be32 psv_index[4]; +}; + +struct mlx5_rwqe_sig { + u8 rsvd0[4]; + u8 signature; + u8 rsvd1[11]; +}; + +struct mlx5_wqe_signature_seg { + u8 rsvd0[4]; + u8 signature; + u8 rsvd1[11]; +}; + +struct mlx5_wqe_inline_seg { + __be32 byte_count; +}; + +struct mlx5_core_qp { + void (*event) (struct mlx5_core_qp *, int); + int qpn; + atomic_t refcount; + struct completion free; + struct mlx5_rsc_debug *dbg; + int pid; +}; + +struct mlx5_qp_path { + u8 fl; + u8 rsvd3; + u8 free_ar; + u8 pkey_index; + u8 rsvd0; + u8 grh_mlid; + __be16 rlid; + u8 ackto_lt; + u8 mgid_index; + u8 static_rate; + u8 hop_limit; + __be32 tclass_flowlabel; + u8 rgid[16]; + u8 rsvd1[4]; + u8 sl; + u8 port; + u8 rsvd2[6]; +}; + +struct mlx5_qp_context { + __be32 flags; + __be32 flags_pd; + u8 mtu_msgmax; + u8 rq_size_stride; + __be16 sq_crq_size; + __be32 qp_counter_set_usr_page; + __be32 wire_qpn; + __be32 log_pg_sz_remote_qpn; + struct mlx5_qp_path pri_path; + struct mlx5_qp_path alt_path; + __be32 params1; + u8 reserved2[4]; + __be32 next_send_psn; + __be32 cqn_send; + u8 reserved3[8]; + __be32 last_acked_psn; + __be32 ssn; + __be32 params2; + __be32 rnr_nextrecvpsn; + __be32 xrcd; + __be32 cqn_recv; + __be64 db_rec_addr; + __be32 qkey; + __be32 rq_type_srqn; + __be32 rmsn; + __be16 hw_sq_wqe_counter; + __be16 sw_sq_wqe_counter; + __be16 hw_rcyclic_byte_counter; + __be16 hw_rq_counter; + __be16 sw_rcyclic_byte_counter; + __be16 sw_rq_counter; + u8 rsvd0[5]; + u8 cgs; + u8 cs_req; + u8 cs_res; + __be64 dc_access_key; + u8 rsvd1[24]; +}; + +struct mlx5_create_qp_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 input_qpn; + u8 rsvd0[4]; + __be32 opt_param_mask; + u8 rsvd1[4]; + struct mlx5_qp_context ctx; + u8 rsvd3[16]; + __be64 pas[0]; +}; + +struct mlx5_create_qp_mbox_out { + struct mlx5_outbox_hdr hdr; + __be32 qpn; + u8 rsvd0[4]; +}; + +struct mlx5_destroy_qp_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 qpn; + u8 rsvd0[4]; +}; + +struct mlx5_destroy_qp_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd0[8]; +}; + +struct mlx5_modify_qp_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 qpn; + u8 rsvd1[4]; + __be32 optparam; + u8 rsvd0[4]; + struct mlx5_qp_context ctx; +}; + +struct mlx5_modify_qp_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd0[8]; +}; + +struct mlx5_query_qp_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 qpn; + u8 rsvd[4]; +}; + +struct mlx5_query_qp_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd1[8]; + __be32 optparam; + u8 rsvd0[4]; + struct mlx5_qp_context ctx; + u8 rsvd2[16]; + __be64 pas[0]; +}; + +struct mlx5_conf_sqp_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 qpn; + u8 rsvd[3]; + u8 type; +}; + +struct mlx5_conf_sqp_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_alloc_xrcd_mbox_in { + struct mlx5_inbox_hdr hdr; + u8 rsvd[8]; +}; + +struct mlx5_alloc_xrcd_mbox_out { + struct mlx5_outbox_hdr hdr; + __be32 xrcdn; + u8 rsvd[4]; +}; + +struct mlx5_dealloc_xrcd_mbox_in { + struct mlx5_inbox_hdr hdr; + __be32 xrcdn; + u8 rsvd[4]; +}; + +struct mlx5_dealloc_xrcd_mbox_out { + struct mlx5_outbox_hdr hdr; + u8 rsvd[8]; +}; + +static inline struct mlx5_core_qp *__mlx5_qp_lookup(struct mlx5_core_dev *dev, u32 qpn) +{ + return radix_tree_lookup(&dev->priv.qp_table.tree, qpn); +} + +int mlx5_core_create_qp(struct mlx5_core_dev *dev, + struct mlx5_core_qp *qp, + struct mlx5_create_qp_mbox_in *in, + int inlen); +int mlx5_core_qp_modify(struct mlx5_core_dev *dev, enum mlx5_qp_state cur_state, + enum mlx5_qp_state new_state, + struct mlx5_modify_qp_mbox_in *in, int sqd_event, + struct mlx5_core_qp *qp); +int mlx5_core_destroy_qp(struct mlx5_core_dev *dev, + struct mlx5_core_qp *qp); +int mlx5_core_qp_query(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, + struct mlx5_query_qp_mbox_out *out, int outlen); + +int mlx5_core_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn); +int mlx5_core_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn); +void mlx5_init_qp_table(struct mlx5_core_dev *dev); +void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev); +int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp); +void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp); + +#endif /* MLX5_QP_H */ diff --git a/include/linux/mlx5/srq.h b/include/linux/mlx5/srq.h new file mode 100644 index 000000000000..e1a363a33663 --- /dev/null +++ b/include/linux/mlx5/srq.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX5_SRQ_H +#define MLX5_SRQ_H + +#include + +void mlx5_init_srq_table(struct mlx5_core_dev *dev); +void mlx5_cleanup_srq_table(struct mlx5_core_dev *dev); + +#endif /* MLX5_SRQ_H */ -- cgit v1.2.3 From 63884c90ffa3f73a81b81f169c51c34d2b9cf75e Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 1 Jul 2013 14:15:17 -0700 Subject: mlx5: Fix parameter type of health_handler_t This deals with the sparse warning: drivers/net/ethernet/mellanox/mlx5/core/health.c:94:54: warning: incorrect type in argument 2 (different address spaces) drivers/net/ethernet/mellanox/mlx5/core/health.c:94:54: expected void *buf drivers/net/ethernet/mellanox/mlx5/core/health.c:94:54: got struct health_buffer [noderef] *health Signed-off-by: Roland Dreier --- include/linux/mlx5/driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index e47f1e4c9b03..f22e4419839b 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -729,7 +729,7 @@ void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev); int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db); void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db); -typedef void (*health_handler_t)(struct pci_dev *pdev, void *buf, int size); +typedef void (*health_handler_t)(struct pci_dev *pdev, struct health_buffer __iomem *buf, int size); int mlx5_register_health_report_handler(health_handler_t handler); void mlx5_unregister_health_report_handler(void); const char *mlx5_command_str(int command); -- cgit v1.2.3 From 8b80cda536ea9bceec0364e897868a30ee13b992 Mon Sep 17 00:00:00 2001 From: Eliezer Tamir Date: Wed, 10 Jul 2013 17:13:26 +0300 Subject: net: rename ll methods to busy-poll Rename ndo_ll_poll to ndo_busy_poll. Rename sk_mark_ll to sk_mark_napi_id. Rename skb_mark_ll to skb_mark_napi_id. Correct all useres of these functions. Update comments and defines in include/net/busy_poll.h Signed-off-by: Eliezer Tamir Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index bb82871b8494..0741a1e919a5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -974,7 +974,7 @@ struct net_device_ops { void (*ndo_netpoll_cleanup)(struct net_device *dev); #endif #ifdef CONFIG_NET_LL_RX_POLL - int (*ndo_ll_poll)(struct napi_struct *dev); + int (*ndo_busy_poll)(struct napi_struct *dev); #endif int (*ndo_set_vf_mac)(struct net_device *dev, int queue, u8 *mac); -- cgit v1.2.3 From 288dde9f23b6726c1e8147bf635721372bf77b16 Mon Sep 17 00:00:00 2001 From: Moshe Lazer Date: Wed, 10 Jul 2013 14:31:03 +0300 Subject: mlx5_core: Adjust hca_cap.uar_page_sz to conform to Connect-IB spec Sparse reported an endianness bug in the assignment to hca_cap.uar_page_sz. Fix the declaration of this field to be __be16 (which is what is in the firmware spec), renaming the field to log_uar_pg_size to conform to the spec, which fixes the endianness bug reported by sparse. Reported-by: Fengguang Wu Signed-off-by: Moshe Lazer Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- include/linux/mlx5/device.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 51390915e538..8de8d8f22384 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -317,8 +317,8 @@ struct mlx5_hca_cap { u8 log_max_pd; u8 rsvd25; u8 log_max_xrcd; - u8 rsvd26[40]; - __be32 uar_page_sz; + u8 rsvd26[42]; + __be16 log_uar_page_sz; u8 rsvd27[28]; u8 log_msx_atomic_size_qp; u8 rsvd28[2]; -- cgit v1.2.3 From 1b375dc30710180c4b88cc59caba6e3481ec5c8b Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Fri, 5 Jul 2013 09:29:32 +0200 Subject: mutex: Move ww_mutex definitions to ww_mutex.h Move the definitions for wound/wait mutexes out to a separate header, ww_mutex.h. This reduces clutter in mutex.h, and increases readability. Suggested-by: Linus Torvalds Signed-off-by: Maarten Lankhorst Acked-by: Peter Zijlstra Acked-by: Rik van Riel Acked-by: Maarten Lankhorst Cc: Dave Airlie Link: http://lkml.kernel.org/r/51D675DC.3000907@canonical.com [ Tidied up the code a bit. ] Signed-off-by: Ingo Molnar --- include/linux/mutex.h | 358 ----------------------------------------- include/linux/reservation.h | 2 +- include/linux/ww_mutex.h | 378 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 379 insertions(+), 359 deletions(-) create mode 100644 include/linux/ww_mutex.h (limited to 'include/linux') diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 3793ed7feeeb..ccd4260834c5 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -78,40 +78,6 @@ struct mutex_waiter { #endif }; -struct ww_class { - atomic_long_t stamp; - struct lock_class_key acquire_key; - struct lock_class_key mutex_key; - const char *acquire_name; - const char *mutex_name; -}; - -struct ww_acquire_ctx { - struct task_struct *task; - unsigned long stamp; - unsigned acquired; -#ifdef CONFIG_DEBUG_MUTEXES - unsigned done_acquire; - struct ww_class *ww_class; - struct ww_mutex *contending_lock; -#endif -#ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lockdep_map dep_map; -#endif -#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH - unsigned deadlock_inject_interval; - unsigned deadlock_inject_countdown; -#endif -}; - -struct ww_mutex { - struct mutex base; - struct ww_acquire_ctx *ctx; -#ifdef CONFIG_DEBUG_MUTEXES - struct ww_class *ww_class; -#endif -}; - #ifdef CONFIG_DEBUG_MUTEXES # include #else @@ -136,11 +102,8 @@ static inline void mutex_destroy(struct mutex *lock) {} #ifdef CONFIG_DEBUG_LOCK_ALLOC # define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ , .dep_map = { .name = #lockname } -# define __WW_CLASS_MUTEX_INITIALIZER(lockname, ww_class) \ - , .ww_class = &ww_class #else # define __DEP_MAP_MUTEX_INITIALIZER(lockname) -# define __WW_CLASS_MUTEX_INITIALIZER(lockname, ww_class) #endif #define __MUTEX_INITIALIZER(lockname) \ @@ -150,48 +113,12 @@ static inline void mutex_destroy(struct mutex *lock) {} __DEBUG_MUTEX_INITIALIZER(lockname) \ __DEP_MAP_MUTEX_INITIALIZER(lockname) } -#define __WW_CLASS_INITIALIZER(ww_class) \ - { .stamp = ATOMIC_LONG_INIT(0) \ - , .acquire_name = #ww_class "_acquire" \ - , .mutex_name = #ww_class "_mutex" } - -#define __WW_MUTEX_INITIALIZER(lockname, class) \ - { .base = { \__MUTEX_INITIALIZER(lockname) } \ - __WW_CLASS_MUTEX_INITIALIZER(lockname, class) } - #define DEFINE_MUTEX(mutexname) \ struct mutex mutexname = __MUTEX_INITIALIZER(mutexname) -#define DEFINE_WW_CLASS(classname) \ - struct ww_class classname = __WW_CLASS_INITIALIZER(classname) - -#define DEFINE_WW_MUTEX(mutexname, ww_class) \ - struct ww_mutex mutexname = __WW_MUTEX_INITIALIZER(mutexname, ww_class) - - extern void __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key); -/** - * ww_mutex_init - initialize the w/w mutex - * @lock: the mutex to be initialized - * @ww_class: the w/w class the mutex should belong to - * - * Initialize the w/w mutex to unlocked state and associate it with the given - * class. - * - * It is not allowed to initialize an already locked mutex. - */ -static inline void ww_mutex_init(struct ww_mutex *lock, - struct ww_class *ww_class) -{ - __mutex_init(&lock->base, ww_class->mutex_name, &ww_class->mutex_key); - lock->ctx = NULL; -#ifdef CONFIG_DEBUG_MUTEXES - lock->ww_class = ww_class; -#endif -} - /** * mutex_is_locked - is the mutex locked * @lock: the mutex to be queried @@ -246,291 +173,6 @@ extern int __must_check mutex_lock_killable(struct mutex *lock); extern int mutex_trylock(struct mutex *lock); extern void mutex_unlock(struct mutex *lock); -/** - * ww_acquire_init - initialize a w/w acquire context - * @ctx: w/w acquire context to initialize - * @ww_class: w/w class of the context - * - * Initializes an context to acquire multiple mutexes of the given w/w class. - * - * Context-based w/w mutex acquiring can be done in any order whatsoever within - * a given lock class. Deadlocks will be detected and handled with the - * wait/wound logic. - * - * Mixing of context-based w/w mutex acquiring and single w/w mutex locking can - * result in undetected deadlocks and is so forbidden. Mixing different contexts - * for the same w/w class when acquiring mutexes can also result in undetected - * deadlocks, and is hence also forbidden. Both types of abuse will be caught by - * enabling CONFIG_PROVE_LOCKING. - * - * Nesting of acquire contexts for _different_ w/w classes is possible, subject - * to the usual locking rules between different lock classes. - * - * An acquire context must be released with ww_acquire_fini by the same task - * before the memory is freed. It is recommended to allocate the context itself - * on the stack. - */ -static inline void ww_acquire_init(struct ww_acquire_ctx *ctx, - struct ww_class *ww_class) -{ - ctx->task = current; - ctx->stamp = atomic_long_inc_return(&ww_class->stamp); - ctx->acquired = 0; -#ifdef CONFIG_DEBUG_MUTEXES - ctx->ww_class = ww_class; - ctx->done_acquire = 0; - ctx->contending_lock = NULL; -#endif -#ifdef CONFIG_DEBUG_LOCK_ALLOC - debug_check_no_locks_freed((void *)ctx, sizeof(*ctx)); - lockdep_init_map(&ctx->dep_map, ww_class->acquire_name, - &ww_class->acquire_key, 0); - mutex_acquire(&ctx->dep_map, 0, 0, _RET_IP_); -#endif -#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH - ctx->deadlock_inject_interval = 1; - ctx->deadlock_inject_countdown = ctx->stamp & 0xf; -#endif -} - -/** - * ww_acquire_done - marks the end of the acquire phase - * @ctx: the acquire context - * - * Marks the end of the acquire phase, any further w/w mutex lock calls using - * this context are forbidden. - * - * Calling this function is optional, it is just useful to document w/w mutex - * code and clearly designated the acquire phase from actually using the locked - * data structures. - */ -static inline void ww_acquire_done(struct ww_acquire_ctx *ctx) -{ -#ifdef CONFIG_DEBUG_MUTEXES - lockdep_assert_held(ctx); - - DEBUG_LOCKS_WARN_ON(ctx->done_acquire); - ctx->done_acquire = 1; -#endif -} - -/** - * ww_acquire_fini - releases a w/w acquire context - * @ctx: the acquire context to free - * - * Releases a w/w acquire context. This must be called _after_ all acquired w/w - * mutexes have been released with ww_mutex_unlock. - */ -static inline void ww_acquire_fini(struct ww_acquire_ctx *ctx) -{ -#ifdef CONFIG_DEBUG_MUTEXES - mutex_release(&ctx->dep_map, 0, _THIS_IP_); - - DEBUG_LOCKS_WARN_ON(ctx->acquired); - if (!config_enabled(CONFIG_PROVE_LOCKING)) - /* - * lockdep will normally handle this, - * but fail without anyway - */ - ctx->done_acquire = 1; - - if (!config_enabled(CONFIG_DEBUG_LOCK_ALLOC)) - /* ensure ww_acquire_fini will still fail if called twice */ - ctx->acquired = ~0U; -#endif -} - -extern int __must_check __ww_mutex_lock(struct ww_mutex *lock, - struct ww_acquire_ctx *ctx); -extern int __must_check __ww_mutex_lock_interruptible(struct ww_mutex *lock, - struct ww_acquire_ctx *ctx); - -/** - * ww_mutex_lock - acquire the w/w mutex - * @lock: the mutex to be acquired - * @ctx: w/w acquire context, or NULL to acquire only a single lock. - * - * Lock the w/w mutex exclusively for this task. - * - * Deadlocks within a given w/w class of locks are detected and handled with the - * wait/wound algorithm. If the lock isn't immediately avaiable this function - * will either sleep until it is (wait case). Or it selects the current context - * for backing off by returning -EDEADLK (wound case). Trying to acquire the - * same lock with the same context twice is also detected and signalled by - * returning -EALREADY. Returns 0 if the mutex was successfully acquired. - * - * In the wound case the caller must release all currently held w/w mutexes for - * the given context and then wait for this contending lock to be available by - * calling ww_mutex_lock_slow. Alternatively callers can opt to not acquire this - * lock and proceed with trying to acquire further w/w mutexes (e.g. when - * scanning through lru lists trying to free resources). - * - * The mutex must later on be released by the same task that - * acquired it. The task may not exit without first unlocking the mutex. Also, - * kernel memory where the mutex resides must not be freed with the mutex still - * locked. The mutex must first be initialized (or statically defined) before it - * can be locked. memset()-ing the mutex to 0 is not allowed. The mutex must be - * of the same w/w lock class as was used to initialize the acquire context. - * - * A mutex acquired with this function must be released with ww_mutex_unlock. - */ -static inline int ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) -{ - if (ctx) - return __ww_mutex_lock(lock, ctx); - else { - mutex_lock(&lock->base); - return 0; - } -} - -/** - * ww_mutex_lock_interruptible - acquire the w/w mutex, interruptible - * @lock: the mutex to be acquired - * @ctx: w/w acquire context - * - * Lock the w/w mutex exclusively for this task. - * - * Deadlocks within a given w/w class of locks are detected and handled with the - * wait/wound algorithm. If the lock isn't immediately avaiable this function - * will either sleep until it is (wait case). Or it selects the current context - * for backing off by returning -EDEADLK (wound case). Trying to acquire the - * same lock with the same context twice is also detected and signalled by - * returning -EALREADY. Returns 0 if the mutex was successfully acquired. If a - * signal arrives while waiting for the lock then this function returns -EINTR. - * - * In the wound case the caller must release all currently held w/w mutexes for - * the given context and then wait for this contending lock to be available by - * calling ww_mutex_lock_slow_interruptible. Alternatively callers can opt to - * not acquire this lock and proceed with trying to acquire further w/w mutexes - * (e.g. when scanning through lru lists trying to free resources). - * - * The mutex must later on be released by the same task that - * acquired it. The task may not exit without first unlocking the mutex. Also, - * kernel memory where the mutex resides must not be freed with the mutex still - * locked. The mutex must first be initialized (or statically defined) before it - * can be locked. memset()-ing the mutex to 0 is not allowed. The mutex must be - * of the same w/w lock class as was used to initialize the acquire context. - * - * A mutex acquired with this function must be released with ww_mutex_unlock. - */ -static inline int __must_check ww_mutex_lock_interruptible(struct ww_mutex *lock, - struct ww_acquire_ctx *ctx) -{ - if (ctx) - return __ww_mutex_lock_interruptible(lock, ctx); - else - return mutex_lock_interruptible(&lock->base); -} - -/** - * ww_mutex_lock_slow - slowpath acquiring of the w/w mutex - * @lock: the mutex to be acquired - * @ctx: w/w acquire context - * - * Acquires a w/w mutex with the given context after a wound case. This function - * will sleep until the lock becomes available. - * - * The caller must have released all w/w mutexes already acquired with the - * context and then call this function on the contended lock. - * - * Afterwards the caller may continue to (re)acquire the other w/w mutexes it - * needs with ww_mutex_lock. Note that the -EALREADY return code from - * ww_mutex_lock can be used to avoid locking this contended mutex twice. - * - * It is forbidden to call this function with any other w/w mutexes associated - * with the context held. It is forbidden to call this on anything else than the - * contending mutex. - * - * Note that the slowpath lock acquiring can also be done by calling - * ww_mutex_lock directly. This function here is simply to help w/w mutex - * locking code readability by clearly denoting the slowpath. - */ -static inline void -ww_mutex_lock_slow(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) -{ - int ret; -#ifdef CONFIG_DEBUG_MUTEXES - DEBUG_LOCKS_WARN_ON(!ctx->contending_lock); -#endif - ret = ww_mutex_lock(lock, ctx); - (void)ret; -} - -/** - * ww_mutex_lock_slow_interruptible - slowpath acquiring of the w/w mutex, - * interruptible - * @lock: the mutex to be acquired - * @ctx: w/w acquire context - * - * Acquires a w/w mutex with the given context after a wound case. This function - * will sleep until the lock becomes available and returns 0 when the lock has - * been acquired. If a signal arrives while waiting for the lock then this - * function returns -EINTR. - * - * The caller must have released all w/w mutexes already acquired with the - * context and then call this function on the contended lock. - * - * Afterwards the caller may continue to (re)acquire the other w/w mutexes it - * needs with ww_mutex_lock. Note that the -EALREADY return code from - * ww_mutex_lock can be used to avoid locking this contended mutex twice. - * - * It is forbidden to call this function with any other w/w mutexes associated - * with the given context held. It is forbidden to call this on anything else - * than the contending mutex. - * - * Note that the slowpath lock acquiring can also be done by calling - * ww_mutex_lock_interruptible directly. This function here is simply to help - * w/w mutex locking code readability by clearly denoting the slowpath. - */ -static inline int __must_check -ww_mutex_lock_slow_interruptible(struct ww_mutex *lock, - struct ww_acquire_ctx *ctx) -{ -#ifdef CONFIG_DEBUG_MUTEXES - DEBUG_LOCKS_WARN_ON(!ctx->contending_lock); -#endif - return ww_mutex_lock_interruptible(lock, ctx); -} - -extern void ww_mutex_unlock(struct ww_mutex *lock); - -/** - * ww_mutex_trylock - tries to acquire the w/w mutex without acquire context - * @lock: mutex to lock - * - * Trylocks a mutex without acquire context, so no deadlock detection is - * possible. Returns 1 if the mutex has been acquired successfully, 0 otherwise. - */ -static inline int __must_check ww_mutex_trylock(struct ww_mutex *lock) -{ - return mutex_trylock(&lock->base); -} - -/*** - * ww_mutex_destroy - mark a w/w mutex unusable - * @lock: the mutex to be destroyed - * - * This function marks the mutex uninitialized, and any subsequent - * use of the mutex is forbidden. The mutex must not be locked when - * this function is called. - */ -static inline void ww_mutex_destroy(struct ww_mutex *lock) -{ - mutex_destroy(&lock->base); -} - -/** - * ww_mutex_is_locked - is the w/w mutex locked - * @lock: the mutex to be queried - * - * Returns 1 if the mutex is locked, 0 if unlocked. - */ -static inline bool ww_mutex_is_locked(struct ww_mutex *lock) -{ - return mutex_is_locked(&lock->base); -} - extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); #ifndef CONFIG_HAVE_ARCH_MUTEX_CPU_RELAX diff --git a/include/linux/reservation.h b/include/linux/reservation.h index e9ee806a9d72..813dae960ebd 100644 --- a/include/linux/reservation.h +++ b/include/linux/reservation.h @@ -39,7 +39,7 @@ #ifndef _LINUX_RESERVATION_H #define _LINUX_RESERVATION_H -#include +#include extern struct ww_class reservation_ww_class; diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h new file mode 100644 index 000000000000..760399a470bd --- /dev/null +++ b/include/linux/ww_mutex.h @@ -0,0 +1,378 @@ +/* + * Wound/Wait Mutexes: blocking mutual exclusion locks with deadlock avoidance + * + * Original mutex implementation started by Ingo Molnar: + * + * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar + * + * Wound/wait implementation: + * Copyright (C) 2013 Canonical Ltd. + * + * This file contains the main data structure and API definitions. + */ + +#ifndef __LINUX_WW_MUTEX_H +#define __LINUX_WW_MUTEX_H + +#include + +struct ww_class { + atomic_long_t stamp; + struct lock_class_key acquire_key; + struct lock_class_key mutex_key; + const char *acquire_name; + const char *mutex_name; +}; + +struct ww_acquire_ctx { + struct task_struct *task; + unsigned long stamp; + unsigned acquired; +#ifdef CONFIG_DEBUG_MUTEXES + unsigned done_acquire; + struct ww_class *ww_class; + struct ww_mutex *contending_lock; +#endif +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif +#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH + unsigned deadlock_inject_interval; + unsigned deadlock_inject_countdown; +#endif +}; + +struct ww_mutex { + struct mutex base; + struct ww_acquire_ctx *ctx; +#ifdef CONFIG_DEBUG_MUTEXES + struct ww_class *ww_class; +#endif +}; + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define __WW_CLASS_MUTEX_INITIALIZER(lockname, ww_class) \ + , .ww_class = &ww_class +#else +# define __WW_CLASS_MUTEX_INITIALIZER(lockname, ww_class) +#endif + +#define __WW_CLASS_INITIALIZER(ww_class) \ + { .stamp = ATOMIC_LONG_INIT(0) \ + , .acquire_name = #ww_class "_acquire" \ + , .mutex_name = #ww_class "_mutex" } + +#define __WW_MUTEX_INITIALIZER(lockname, class) \ + { .base = { \__MUTEX_INITIALIZER(lockname) } \ + __WW_CLASS_MUTEX_INITIALIZER(lockname, class) } + +#define DEFINE_WW_CLASS(classname) \ + struct ww_class classname = __WW_CLASS_INITIALIZER(classname) + +#define DEFINE_WW_MUTEX(mutexname, ww_class) \ + struct ww_mutex mutexname = __WW_MUTEX_INITIALIZER(mutexname, ww_class) + +/** + * ww_mutex_init - initialize the w/w mutex + * @lock: the mutex to be initialized + * @ww_class: the w/w class the mutex should belong to + * + * Initialize the w/w mutex to unlocked state and associate it with the given + * class. + * + * It is not allowed to initialize an already locked mutex. + */ +static inline void ww_mutex_init(struct ww_mutex *lock, + struct ww_class *ww_class) +{ + __mutex_init(&lock->base, ww_class->mutex_name, &ww_class->mutex_key); + lock->ctx = NULL; +#ifdef CONFIG_DEBUG_MUTEXES + lock->ww_class = ww_class; +#endif +} + +/** + * ww_acquire_init - initialize a w/w acquire context + * @ctx: w/w acquire context to initialize + * @ww_class: w/w class of the context + * + * Initializes an context to acquire multiple mutexes of the given w/w class. + * + * Context-based w/w mutex acquiring can be done in any order whatsoever within + * a given lock class. Deadlocks will be detected and handled with the + * wait/wound logic. + * + * Mixing of context-based w/w mutex acquiring and single w/w mutex locking can + * result in undetected deadlocks and is so forbidden. Mixing different contexts + * for the same w/w class when acquiring mutexes can also result in undetected + * deadlocks, and is hence also forbidden. Both types of abuse will be caught by + * enabling CONFIG_PROVE_LOCKING. + * + * Nesting of acquire contexts for _different_ w/w classes is possible, subject + * to the usual locking rules between different lock classes. + * + * An acquire context must be released with ww_acquire_fini by the same task + * before the memory is freed. It is recommended to allocate the context itself + * on the stack. + */ +static inline void ww_acquire_init(struct ww_acquire_ctx *ctx, + struct ww_class *ww_class) +{ + ctx->task = current; + ctx->stamp = atomic_long_inc_return(&ww_class->stamp); + ctx->acquired = 0; +#ifdef CONFIG_DEBUG_MUTEXES + ctx->ww_class = ww_class; + ctx->done_acquire = 0; + ctx->contending_lock = NULL; +#endif +#ifdef CONFIG_DEBUG_LOCK_ALLOC + debug_check_no_locks_freed((void *)ctx, sizeof(*ctx)); + lockdep_init_map(&ctx->dep_map, ww_class->acquire_name, + &ww_class->acquire_key, 0); + mutex_acquire(&ctx->dep_map, 0, 0, _RET_IP_); +#endif +#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH + ctx->deadlock_inject_interval = 1; + ctx->deadlock_inject_countdown = ctx->stamp & 0xf; +#endif +} + +/** + * ww_acquire_done - marks the end of the acquire phase + * @ctx: the acquire context + * + * Marks the end of the acquire phase, any further w/w mutex lock calls using + * this context are forbidden. + * + * Calling this function is optional, it is just useful to document w/w mutex + * code and clearly designated the acquire phase from actually using the locked + * data structures. + */ +static inline void ww_acquire_done(struct ww_acquire_ctx *ctx) +{ +#ifdef CONFIG_DEBUG_MUTEXES + lockdep_assert_held(ctx); + + DEBUG_LOCKS_WARN_ON(ctx->done_acquire); + ctx->done_acquire = 1; +#endif +} + +/** + * ww_acquire_fini - releases a w/w acquire context + * @ctx: the acquire context to free + * + * Releases a w/w acquire context. This must be called _after_ all acquired w/w + * mutexes have been released with ww_mutex_unlock. + */ +static inline void ww_acquire_fini(struct ww_acquire_ctx *ctx) +{ +#ifdef CONFIG_DEBUG_MUTEXES + mutex_release(&ctx->dep_map, 0, _THIS_IP_); + + DEBUG_LOCKS_WARN_ON(ctx->acquired); + if (!config_enabled(CONFIG_PROVE_LOCKING)) + /* + * lockdep will normally handle this, + * but fail without anyway + */ + ctx->done_acquire = 1; + + if (!config_enabled(CONFIG_DEBUG_LOCK_ALLOC)) + /* ensure ww_acquire_fini will still fail if called twice */ + ctx->acquired = ~0U; +#endif +} + +extern int __must_check __ww_mutex_lock(struct ww_mutex *lock, + struct ww_acquire_ctx *ctx); +extern int __must_check __ww_mutex_lock_interruptible(struct ww_mutex *lock, + struct ww_acquire_ctx *ctx); + +/** + * ww_mutex_lock - acquire the w/w mutex + * @lock: the mutex to be acquired + * @ctx: w/w acquire context, or NULL to acquire only a single lock. + * + * Lock the w/w mutex exclusively for this task. + * + * Deadlocks within a given w/w class of locks are detected and handled with the + * wait/wound algorithm. If the lock isn't immediately avaiable this function + * will either sleep until it is (wait case). Or it selects the current context + * for backing off by returning -EDEADLK (wound case). Trying to acquire the + * same lock with the same context twice is also detected and signalled by + * returning -EALREADY. Returns 0 if the mutex was successfully acquired. + * + * In the wound case the caller must release all currently held w/w mutexes for + * the given context and then wait for this contending lock to be available by + * calling ww_mutex_lock_slow. Alternatively callers can opt to not acquire this + * lock and proceed with trying to acquire further w/w mutexes (e.g. when + * scanning through lru lists trying to free resources). + * + * The mutex must later on be released by the same task that + * acquired it. The task may not exit without first unlocking the mutex. Also, + * kernel memory where the mutex resides must not be freed with the mutex still + * locked. The mutex must first be initialized (or statically defined) before it + * can be locked. memset()-ing the mutex to 0 is not allowed. The mutex must be + * of the same w/w lock class as was used to initialize the acquire context. + * + * A mutex acquired with this function must be released with ww_mutex_unlock. + */ +static inline int ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) +{ + if (ctx) + return __ww_mutex_lock(lock, ctx); + + mutex_lock(&lock->base); + return 0; +} + +/** + * ww_mutex_lock_interruptible - acquire the w/w mutex, interruptible + * @lock: the mutex to be acquired + * @ctx: w/w acquire context + * + * Lock the w/w mutex exclusively for this task. + * + * Deadlocks within a given w/w class of locks are detected and handled with the + * wait/wound algorithm. If the lock isn't immediately avaiable this function + * will either sleep until it is (wait case). Or it selects the current context + * for backing off by returning -EDEADLK (wound case). Trying to acquire the + * same lock with the same context twice is also detected and signalled by + * returning -EALREADY. Returns 0 if the mutex was successfully acquired. If a + * signal arrives while waiting for the lock then this function returns -EINTR. + * + * In the wound case the caller must release all currently held w/w mutexes for + * the given context and then wait for this contending lock to be available by + * calling ww_mutex_lock_slow_interruptible. Alternatively callers can opt to + * not acquire this lock and proceed with trying to acquire further w/w mutexes + * (e.g. when scanning through lru lists trying to free resources). + * + * The mutex must later on be released by the same task that + * acquired it. The task may not exit without first unlocking the mutex. Also, + * kernel memory where the mutex resides must not be freed with the mutex still + * locked. The mutex must first be initialized (or statically defined) before it + * can be locked. memset()-ing the mutex to 0 is not allowed. The mutex must be + * of the same w/w lock class as was used to initialize the acquire context. + * + * A mutex acquired with this function must be released with ww_mutex_unlock. + */ +static inline int __must_check ww_mutex_lock_interruptible(struct ww_mutex *lock, + struct ww_acquire_ctx *ctx) +{ + if (ctx) + return __ww_mutex_lock_interruptible(lock, ctx); + else + return mutex_lock_interruptible(&lock->base); +} + +/** + * ww_mutex_lock_slow - slowpath acquiring of the w/w mutex + * @lock: the mutex to be acquired + * @ctx: w/w acquire context + * + * Acquires a w/w mutex with the given context after a wound case. This function + * will sleep until the lock becomes available. + * + * The caller must have released all w/w mutexes already acquired with the + * context and then call this function on the contended lock. + * + * Afterwards the caller may continue to (re)acquire the other w/w mutexes it + * needs with ww_mutex_lock. Note that the -EALREADY return code from + * ww_mutex_lock can be used to avoid locking this contended mutex twice. + * + * It is forbidden to call this function with any other w/w mutexes associated + * with the context held. It is forbidden to call this on anything else than the + * contending mutex. + * + * Note that the slowpath lock acquiring can also be done by calling + * ww_mutex_lock directly. This function here is simply to help w/w mutex + * locking code readability by clearly denoting the slowpath. + */ +static inline void +ww_mutex_lock_slow(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) +{ + int ret; +#ifdef CONFIG_DEBUG_MUTEXES + DEBUG_LOCKS_WARN_ON(!ctx->contending_lock); +#endif + ret = ww_mutex_lock(lock, ctx); + (void)ret; +} + +/** + * ww_mutex_lock_slow_interruptible - slowpath acquiring of the w/w mutex, interruptible + * @lock: the mutex to be acquired + * @ctx: w/w acquire context + * + * Acquires a w/w mutex with the given context after a wound case. This function + * will sleep until the lock becomes available and returns 0 when the lock has + * been acquired. If a signal arrives while waiting for the lock then this + * function returns -EINTR. + * + * The caller must have released all w/w mutexes already acquired with the + * context and then call this function on the contended lock. + * + * Afterwards the caller may continue to (re)acquire the other w/w mutexes it + * needs with ww_mutex_lock. Note that the -EALREADY return code from + * ww_mutex_lock can be used to avoid locking this contended mutex twice. + * + * It is forbidden to call this function with any other w/w mutexes associated + * with the given context held. It is forbidden to call this on anything else + * than the contending mutex. + * + * Note that the slowpath lock acquiring can also be done by calling + * ww_mutex_lock_interruptible directly. This function here is simply to help + * w/w mutex locking code readability by clearly denoting the slowpath. + */ +static inline int __must_check +ww_mutex_lock_slow_interruptible(struct ww_mutex *lock, + struct ww_acquire_ctx *ctx) +{ +#ifdef CONFIG_DEBUG_MUTEXES + DEBUG_LOCKS_WARN_ON(!ctx->contending_lock); +#endif + return ww_mutex_lock_interruptible(lock, ctx); +} + +extern void ww_mutex_unlock(struct ww_mutex *lock); + +/** + * ww_mutex_trylock - tries to acquire the w/w mutex without acquire context + * @lock: mutex to lock + * + * Trylocks a mutex without acquire context, so no deadlock detection is + * possible. Returns 1 if the mutex has been acquired successfully, 0 otherwise. + */ +static inline int __must_check ww_mutex_trylock(struct ww_mutex *lock) +{ + return mutex_trylock(&lock->base); +} + +/*** + * ww_mutex_destroy - mark a w/w mutex unusable + * @lock: the mutex to be destroyed + * + * This function marks the mutex uninitialized, and any subsequent + * use of the mutex is forbidden. The mutex must not be locked when + * this function is called. + */ +static inline void ww_mutex_destroy(struct ww_mutex *lock) +{ + mutex_destroy(&lock->base); +} + +/** + * ww_mutex_is_locked - is the w/w mutex locked + * @lock: the mutex to be queried + * + * Returns 1 if the mutex is locked, 0 if unlocked. + */ +static inline bool ww_mutex_is_locked(struct ww_mutex *lock) +{ + return mutex_is_locked(&lock->base); +} + +#endif -- cgit v1.2.3 From 4f5e65a1cc90bbb15b9f6cdc362922af1bcc155a Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 8 Jul 2013 14:24:16 -0700 Subject: fput: turn "list_head delayed_fput_list" into llist_head fput() and delayed_fput() can use llist and avoid the locking. This is unlikely path, it is not that this change can improve the performance, but this way the code looks simpler. Signed-off-by: Oleg Nesterov Suggested-by: Andrew Morton Cc: Al Viro Cc: Andrey Vagin Cc: "Eric W. Biederman" Cc: David Howells Cc: Huang Ying Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 834c9e5113d9..d40e8e78bbd1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -768,6 +769,7 @@ struct file { */ union { struct list_head fu_list; + struct llist_node fu_llist; struct rcu_head fu_rcuhead; } f_u; struct path f_path; -- cgit v1.2.3 From fb4214db50b00558cc6e274c88b3f7325068e942 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 8 Jul 2013 14:24:18 -0700 Subject: llist: fix/simplify llist_add() and llist_add_batch() 1. This is mostly theoretical, but llist_add*() need ACCESS_ONCE(). Otherwise it is not guaranteed that the first cmpxchg() uses the same value for old_entry and new_last->next. 2. These helpers cache the result of cmpxchg() and read the initial value of head->first before the main loop. I do not think this makes sense. In the likely case cmpxchg() succeeds, otherwise it doesn't hurt to reload head->first. I think it would be better to simplify the code and simply read ->first before cmpxchg(). Signed-off-by: Oleg Nesterov Cc: Al Viro Cc: Andrey Vagin Cc: "Eric W. Biederman" Cc: David Howells Cc: Huang Ying Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- include/linux/llist.h | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/llist.h b/include/linux/llist.h index a5199f6d0e82..3e2b969d68f6 100644 --- a/include/linux/llist.h +++ b/include/linux/llist.h @@ -151,18 +151,13 @@ static inline struct llist_node *llist_next(struct llist_node *node) */ static inline bool llist_add(struct llist_node *new, struct llist_head *head) { - struct llist_node *entry, *old_entry; - - entry = head->first; - for (;;) { - old_entry = entry; - new->next = entry; - entry = cmpxchg(&head->first, old_entry, new); - if (entry == old_entry) - break; - } - - return old_entry == NULL; + struct llist_node *first; + + do { + new->next = first = ACCESS_ONCE(head->first); + } while (cmpxchg(&head->first, first, new) != first); + + return !first; } /** -- cgit v1.2.3 From e9a17bd73a29e5323c37ec5ffe50fc0e825d3d03 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 8 Jul 2013 14:24:19 -0700 Subject: llist: llist_add() can use llist_add_batch() llist_add(new, head) can simply use llist_add_batch(new, new, head), no need to duplicate the code. This obviously uninlines llist_add() and to me this is a win. But we can make llist_add_batch() inline if this is desirable, in this case gcc can notice that new_first == new_last if the caller is llist_add(). Signed-off-by: Oleg Nesterov Cc: Al Viro Cc: Andrey Vagin Cc: "Eric W. Biederman" Cc: David Howells Cc: Huang Ying Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Al Viro --- include/linux/llist.h | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/llist.h b/include/linux/llist.h index 3e2b969d68f6..cdaa7f023899 100644 --- a/include/linux/llist.h +++ b/include/linux/llist.h @@ -142,6 +142,9 @@ static inline struct llist_node *llist_next(struct llist_node *node) return node->next; } +extern bool llist_add_batch(struct llist_node *new_first, + struct llist_node *new_last, + struct llist_head *head); /** * llist_add - add a new entry * @new: new entry to be added @@ -151,13 +154,7 @@ static inline struct llist_node *llist_next(struct llist_node *node) */ static inline bool llist_add(struct llist_node *new, struct llist_head *head) { - struct llist_node *first; - - do { - new->next = first = ACCESS_ONCE(head->first); - } while (cmpxchg(&head->first, first, new) != first); - - return !first; + return llist_add_batch(new, new, head); } /** @@ -173,9 +170,6 @@ static inline struct llist_node *llist_del_all(struct llist_head *head) return xchg(&head->first, NULL); } -extern bool llist_add_batch(struct llist_node *new_first, - struct llist_node *new_last, - struct llist_head *head); extern struct llist_node *llist_del_first(struct llist_head *head); #endif /* LLIST_H */ -- cgit v1.2.3 From a95e691f9c4a6e24fdeab6d7feae6d5411fe8a69 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 14 Jul 2013 16:43:54 +0400 Subject: rpc_create_*_dir: don't bother with qstr just pass the name Signed-off-by: Al Viro --- include/linux/sunrpc/rpc_pipe_fs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index a7b422b33eda..aa5b582cc471 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -73,12 +73,12 @@ extern ssize_t rpc_pipe_generic_upcall(struct file *, struct rpc_pipe_msg *, extern int rpc_queue_upcall(struct rpc_pipe *, struct rpc_pipe_msg *); struct rpc_clnt; -extern struct dentry *rpc_create_client_dir(struct dentry *, struct qstr *, struct rpc_clnt *); +extern struct dentry *rpc_create_client_dir(struct dentry *, const char *, struct rpc_clnt *); extern int rpc_remove_client_dir(struct dentry *); struct cache_detail; extern struct dentry *rpc_create_cache_dir(struct dentry *, - struct qstr *, + const char *, umode_t umode, struct cache_detail *); extern void rpc_remove_cache_dir(struct dentry *); -- cgit v1.2.3 From 1258ca805f613025ec079d959d4a78acfb1f79d3 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Thu, 11 Jul 2013 13:55:58 +0900 Subject: PM / Sleep: Fix comment typo in pm_wakeup.h Fix a comment typo (sorce -> source) in pm_wakeup.h. [rjw: Changelog] Signed-off-by: Chanwoo Choi Signed-off-by: Rafael J. Wysocki --- include/linux/pm_wakeup.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h index 569781faa504..a0f70808d7f4 100644 --- a/include/linux/pm_wakeup.h +++ b/include/linux/pm_wakeup.h @@ -36,8 +36,8 @@ * @last_time: Monotonic clock when the wakeup source's was touched last time. * @prevent_sleep_time: Total time this source has been preventing autosleep. * @event_count: Number of signaled wakeup events. - * @active_count: Number of times the wakeup sorce was activated. - * @relax_count: Number of times the wakeup sorce was deactivated. + * @active_count: Number of times the wakeup source was activated. + * @relax_count: Number of times the wakeup source was deactivated. * @expire_count: Number of times the wakeup source's timeout has expired. * @wakeup_count: Number of times the wakeup source might abort suspend. * @active: Status of the wakeup source. -- cgit v1.2.3 From 0db0628d90125193280eabb501c94feaf48fa9ab Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Wed, 19 Jun 2013 14:53:51 -0400 Subject: kernel: delete __cpuinit usage from all core kernel files The __cpuinit type of throwaway sections might have made sense some time ago when RAM was more constrained, but now the savings do not offset the cost and complications. For example, the fix in commit 5e427ec2d0 ("x86: Fix bit corruption at CPU resume time") is a good example of the nasty type of bugs that can be created with improper use of the various __init prefixes. After a discussion on LKML[1] it was decided that cpuinit should go the way of devinit and be phased out. Once all the users are gone, we can then finally remove the macros themselves from linux/init.h. This removes all the uses of the __cpuinit macros from C files in the core kernel directories (kernel, init, lib, mm, and include) that don't really have a specific maintainer. [1] https://lkml.org/lkml/2013/5/20/589 Signed-off-by: Paul Gortmaker --- include/linux/cpu.h | 2 +- include/linux/perf_event.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 944f283f01c4..ab0eade73039 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -114,7 +114,7 @@ enum { /* Need to know about CPUs going up/down? */ #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) #define cpu_notifier(fn, pri) { \ - static struct notifier_block fn##_nb __cpuinitdata = \ + static struct notifier_block fn##_nb = \ { .notifier_call = fn, .priority = pri }; \ register_cpu_notifier(&fn##_nb); \ } diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 8873f82c7baa..c43f6eabad5b 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -826,7 +826,7 @@ static inline void perf_restore_debug_store(void) { } */ #define perf_cpu_notifier(fn) \ do { \ - static struct notifier_block fn##_nb __cpuinitdata = \ + static struct notifier_block fn##_nb = \ { .notifier_call = fn, .priority = CPU_PRI_PERF }; \ unsigned long cpu = smp_processor_id(); \ unsigned long flags; \ -- cgit v1.2.3 From b9b3259746d77f4fcb786e2a43c25bcc40773755 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 14 Jul 2013 16:05:51 -0700 Subject: sysfs.h: add __ATTR_RW() macro A number of parts of the kernel created their own version of this, might as well have the sysfs core provide it instead. Reviewed-by: Guenter Roeck Tested-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- include/linux/sysfs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index e2cee22f578a..9cd20c8404e5 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -79,6 +79,8 @@ struct attribute_group { .show = _name##_show, \ } +#define __ATTR_RW(_name) __ATTR(_name, 0644, _name##_show, _name##_store) + #define __ATTR_NULL { .attr = { .name = NULL } } #ifdef CONFIG_DEBUG_LOCK_ALLOC -- cgit v1.2.3 From f2f37f58b1b933b06d6d84e80a31a1b500fb0db2 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 14 Jul 2013 16:05:52 -0700 Subject: sysfs.h: add ATTRIBUTE_GROUPS() macro To make it easier for driver subsystems to work with attribute groups, create the ATTRIBUTE_GROUPS macro to remove some of the repetitive typing for the most common use for attribute groups. Reviewed-by: Guenter Roeck Tested-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- include/linux/sysfs.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 9cd20c8404e5..f62ff01e5f59 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -94,6 +94,15 @@ struct attribute_group { #define __ATTR_IGNORE_LOCKDEP __ATTR #endif +#define ATTRIBUTE_GROUPS(name) \ +static const struct attribute_group name##_group = { \ + .attrs = name##_attrs, \ +}; \ +static const struct attribute_group *name##_groups[] = { \ + &name##_group, \ + NULL, \ +} + #define attr_name(_attr) (_attr).attr.name struct file; -- cgit v1.2.3 From e4b63603c2a1e2c4db3de11b0f2b17360a7695bb Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 14 Jul 2013 16:05:53 -0700 Subject: sysfs.h: add BIN_ATTR macro This makes it easier to create static binary attributes, which is needed in a number of drivers, instead of "open coding" them. Reviewed-by: Guenter Roeck Tested-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- include/linux/sysfs.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index f62ff01e5f59..d50a96b9bb6d 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -132,6 +132,15 @@ struct bin_attribute { */ #define sysfs_bin_attr_init(bin_attr) sysfs_attr_init(&(bin_attr)->attr) +/* macro to create static binary attributes easier */ +#define BIN_ATTR(_name, _mode, _read, _write, _size) \ +struct bin_attribute bin_attr_##_name = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .read = _read, \ + .write = _write, \ + .size = _size, \ +} + struct sysfs_ops { ssize_t (*show)(struct kobject *, struct attribute *,char *); ssize_t (*store)(struct kobject *,struct attribute *,const char *, size_t); -- cgit v1.2.3 From ced321bf9151535f85779b0004c93529f860b2a4 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 14 Jul 2013 16:05:54 -0700 Subject: driver core: device.h: add RW and RO attribute macros Make it easier to create attributes without having to always audit the mode settings. Reviewed-by: Guenter Roeck Tested-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index bcf8c0d4cd98..f207a8f49f80 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -47,7 +47,11 @@ struct bus_attribute { }; #define BUS_ATTR(_name, _mode, _show, _store) \ -struct bus_attribute bus_attr_##_name = __ATTR(_name, _mode, _show, _store) + struct bus_attribute bus_attr_##_name = __ATTR(_name, _mode, _show, _store) +#define BUS_ATTR_RW(_name) \ + struct bus_attribute bus_attr_##_name = __ATTR_RW(_name) +#define BUS_ATTR_RO(_name) \ + struct bus_attribute bus_attr_##_name = __ATTR_RO(_name) extern int __must_check bus_create_file(struct bus_type *, struct bus_attribute *); @@ -261,9 +265,12 @@ struct driver_attribute { size_t count); }; -#define DRIVER_ATTR(_name, _mode, _show, _store) \ -struct driver_attribute driver_attr_##_name = \ - __ATTR(_name, _mode, _show, _store) +#define DRIVER_ATTR(_name, _mode, _show, _store) \ + struct driver_attribute driver_attr_##_name = __ATTR(_name, _mode, _show, _store) +#define DRIVER_ATTR_RW(_name) \ + struct driver_attribute driver_attr_##_name = __ATTR_RW(_name) +#define DRIVER_ATTR_RO(_name) \ + struct driver_attribute driver_attr_##_name = __ATTR_RO(_name) extern int __must_check driver_create_file(struct device_driver *driver, const struct driver_attribute *attr); @@ -414,8 +421,12 @@ struct class_attribute { const struct class_attribute *attr); }; -#define CLASS_ATTR(_name, _mode, _show, _store) \ -struct class_attribute class_attr_##_name = __ATTR(_name, _mode, _show, _store) +#define CLASS_ATTR(_name, _mode, _show, _store) \ + struct class_attribute class_attr_##_name = __ATTR(_name, _mode, _show, _store) +#define CLASS_ATTR_RW(_name) \ + struct class_attribute class_attr_##_name = __ATTR_RW(_name) +#define CLASS_ATTR_RO(_name) \ + struct class_attribute class_attr_##_name = __ATTR_RO(_name) extern int __must_check class_create_file(struct class *class, const struct class_attribute *attr); @@ -423,7 +434,6 @@ extern void class_remove_file(struct class *class, const struct class_attribute *attr); /* Simple class attribute that is just a static string */ - struct class_attribute_string { struct class_attribute attr; char *str; @@ -512,6 +522,10 @@ ssize_t device_store_bool(struct device *dev, struct device_attribute *attr, #define DEVICE_ATTR(_name, _mode, _show, _store) \ struct device_attribute dev_attr_##_name = __ATTR(_name, _mode, _show, _store) +#define DEVICE_ATTR_RW(_name) \ + struct device_attribute dev_attr_##_name = __ATTR_RW(_name) +#define DEVICE_ATTR_RO(_name) \ + struct device_attribute dev_attr_##_name = __ATTR_RO(_name) #define DEVICE_ULONG_ATTR(_name, _mode, _var) \ struct dev_ext_attribute dev_attr_##_name = \ { __ATTR(_name, _mode, device_show_ulong, device_store_ulong), &(_var) } -- cgit v1.2.3 From 6ab9cea16075ea707022753395f340b67f64304c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 14 Jul 2013 16:05:55 -0700 Subject: sysfs: add support for binary attributes in groups groups should be able to support binary attributes, just like it supports "normal" attributes. This lets us only handle one type of structure, groups, throughout the driver core and subsystems, making binary attributes a "full fledged" part of the driver model, and not something just "tacked on". Reported-by: Oliver Schinagl Reviewed-by: Guenter Roeck Tested-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- include/linux/sysfs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index d50a96b9bb6d..2c3b6a30697d 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -21,6 +21,7 @@ struct kobject; struct module; +struct bin_attribute; enum kobj_ns_type; struct attribute { @@ -59,10 +60,9 @@ struct attribute_group { umode_t (*is_visible)(struct kobject *, struct attribute *, int); struct attribute **attrs; + struct bin_attribute **bin_attrs; }; - - /** * Use these macros to make defining attributes easier. See include/linux/device.h * for examples.. -- cgit v1.2.3 From 39ef311204941ddd01ea2950d6220c8ccc710d15 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 14 Jul 2013 16:05:57 -0700 Subject: driver core: Introduce device_create_groups device_create_groups lets callers create devices as well as associated sysfs attributes with a single call. This avoids race conditions seen if sysfs attributes on new devices are created later. [fixed up comment block placement and add checks for printk buffer formats - gregkh] Signed-off-by: Guenter Roeck Cc: Jean Delvare Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index f207a8f49f80..bd5931e89f74 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -938,6 +938,11 @@ extern __printf(5, 6) struct device *device_create(struct class *cls, struct device *parent, dev_t devt, void *drvdata, const char *fmt, ...); +extern __printf(6, 7) +struct device *device_create_with_groups(struct class *cls, + struct device *parent, dev_t devt, void *drvdata, + const struct attribute_group **groups, + const char *fmt, ...); extern void device_destroy(struct class *cls, dev_t devt); /* -- cgit v1.2.3 From d05a6f96c76062b5f25858ac02cf677602076f7e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 14 Jul 2013 16:05:58 -0700 Subject: driver core: add default groups to struct class We should be using groups, not attribute lists, for classes to allow subdirectories, and soon, binary files. Groups are just more flexible overall, so add them. The dev_attrs list will go away after all in-kernel users are converted to use dev_groups. Reviewed-by: Guenter Roeck Tested-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index bd5931e89f74..22b546a58591 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -320,6 +320,7 @@ int subsys_virtual_register(struct bus_type *subsys, * @name: Name of the class. * @owner: The module owner. * @class_attrs: Default attributes of this class. + * @dev_groups: Default attributes of the devices that belong to the class. * @dev_attrs: Default attributes of the devices belong to the class. * @dev_bin_attrs: Default binary attributes of the devices belong to the class. * @dev_kobj: The kobject that represents this class and links it into the hierarchy. @@ -349,7 +350,8 @@ struct class { struct module *owner; struct class_attribute *class_attrs; - struct device_attribute *dev_attrs; + struct device_attribute *dev_attrs; /* use dev_groups instead */ + const struct attribute_group **dev_groups; struct bin_attribute *dev_bin_attrs; struct kobject *dev_kobj; -- cgit v1.2.3 From 3493f69f4c4e8703961919a9a56c2d2e6a25b46f Mon Sep 17 00:00:00 2001 From: Oliver Schinagl Date: Sun, 14 Jul 2013 16:05:59 -0700 Subject: sysfs: add more helper macro's for (bin_)attribute(_groups) With the recent changes to sysfs there's various helper macro's. However there's no RW, RO BIN_ helper macro's. This patch adds them. Signed-off-by: Oliver Schinagl Signed-off-by: Greg Kroah-Hartman --- include/linux/sysfs.h | 51 ++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 2c3b6a30697d..d907a7328025 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -17,6 +17,7 @@ #include #include #include +#include #include struct kobject; @@ -94,15 +95,18 @@ struct attribute_group { #define __ATTR_IGNORE_LOCKDEP __ATTR #endif -#define ATTRIBUTE_GROUPS(name) \ -static const struct attribute_group name##_group = { \ - .attrs = name##_attrs, \ -}; \ -static const struct attribute_group *name##_groups[] = { \ - &name##_group, \ +#define __ATTRIBUTE_GROUPS(_name) \ +static const struct attribute_group *_name##_groups[] = { \ + &_name##_group, \ NULL, \ } +#define ATTRIBUTE_GROUPS(_name) \ +static const struct attribute_group _name##_group = { \ + .attrs = _name##_attrs, \ +}; \ +__ATTRIBUTE_GROUPS(_name) + #define attr_name(_attr) (_attr).attr.name struct file; @@ -132,15 +136,36 @@ struct bin_attribute { */ #define sysfs_bin_attr_init(bin_attr) sysfs_attr_init(&(bin_attr)->attr) -/* macro to create static binary attributes easier */ -#define BIN_ATTR(_name, _mode, _read, _write, _size) \ -struct bin_attribute bin_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ - .read = _read, \ - .write = _write, \ - .size = _size, \ +/* macros to create static binary attributes easier */ +#define __BIN_ATTR(_name, _mode, _read, _write, _size) { \ + .attr = { .name = __stringify(_name), .mode = _mode }, \ + .read = _read, \ + .write = _write, \ + .size = _size, \ +} + +#define __BIN_ATTR_RO(_name, _size) { \ + .attr = { .name = __stringify(_name), .mode = S_IRUGO }, \ + .read = _name##_read, \ + .size = _size, \ } +#define __BIN_ATTR_RW(_name, _size) __BIN_ATTR(_name, \ + (S_IWUSR | S_IRUGO), _name##_read, \ + _name##_write) + +#define __BIN_ATTR_NULL __ATTR_NULL + +#define BIN_ATTR(_name, _mode, _read, _write, _size) \ +struct bin_attribute bin_attr_##_name = __BIN_ATTR(_name, _mode, _read, \ + _write, _size) + +#define BIN_ATTR_RO(_name, _size) \ +struct bin_attribute bin_attr_##_name = __BIN_ATTR_RO(_name, _size) + +#define BIN_ATTR_RW(_name, _size) \ +struct bin_attribute bin_attr_##_name = __BIN_ATTR_RW(_name, _size) + struct sysfs_ops { ssize_t (*show)(struct kobject *, struct attribute *,char *); ssize_t (*store)(struct kobject *,struct attribute *,const char *, size_t); -- cgit v1.2.3 From aa01aa3ca205ea04f44423a58bae38aec886fb96 Mon Sep 17 00:00:00 2001 From: Oliver Schinagl Date: Sun, 14 Jul 2013 16:06:00 -0700 Subject: sysfs: use file mode defines from stat.h With the last patches stat.h was included to the header, and thus those permission defines should be used. Signed-off-by: Oliver Schinagl Signed-off-by: Greg Kroah-Hartman --- include/linux/sysfs.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index d907a7328025..9e8a9b555ad6 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -69,18 +69,19 @@ struct attribute_group { * for examples.. */ -#define __ATTR(_name,_mode,_show,_store) { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ - .show = _show, \ - .store = _store, \ +#define __ATTR(_name,_mode,_show,_store) { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store, \ } -#define __ATTR_RO(_name) { \ - .attr = { .name = __stringify(_name), .mode = 0444 }, \ - .show = _name##_show, \ +#define __ATTR_RO(_name) { \ + .attr = { .name = __stringify(_name), .mode = S_IRUGO }, \ + .show = _name##_show, \ } -#define __ATTR_RW(_name) __ATTR(_name, 0644, _name##_show, _name##_store) +#define __ATTR_RW(_name) __ATTR(_name, (S_IWUSR | S_IRUGO), \ + _name##_show, _name##_store) #define __ATTR_NULL { .attr = { .name = NULL } } -- cgit v1.2.3 From c0d15cc7ee8c0d1970197d9eb1727503bcdd2471 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Tue, 16 Jul 2013 22:44:08 -0400 Subject: linked-list: Remove __list_for_each __list_for_each used to be the non prefetch() aware list walking primitive. When we removed the prefetch macros from the list routines, it became redundant. Given it does exactly the same thing as list_for_each now, we might as well remove it and call list_for_each directly. All users of __list_for_each have been converted to list_for_each calls in the current merge window. Signed-off-by: Dave Jones Signed-off-by: Linus Torvalds --- include/linux/list.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/list.h b/include/linux/list.h index b83e5657365a..f4d8a2f12a33 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -380,17 +380,6 @@ static inline void list_splice_tail_init(struct list_head *list, #define list_for_each(pos, head) \ for (pos = (head)->next; pos != (head); pos = pos->next) -/** - * __list_for_each - iterate over a list - * @pos: the &struct list_head to use as a loop cursor. - * @head: the head for your list. - * - * This variant doesn't differ from list_for_each() any more. - * We don't do prefetching in either case. - */ -#define __list_for_each(pos, head) \ - for (pos = (head)->next; pos != (head); pos = pos->next) - /** * list_for_each_prev - iterate over a list backwards * @pos: the &struct list_head to use as a loop cursor. -- cgit v1.2.3 From 8c5bd7adb2ce47e6aa39d17b2375f69b0c0aa255 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 18 Jul 2013 02:08:06 +0200 Subject: ACPI / video / i915: No ACPI backlight if firmware expects Windows 8 According to Matthew Garrett, "Windows 8 leaves backlight control up to individual graphics drivers rather than making ACPI calls itself. There's plenty of evidence to suggest that the Intel driver for Windows [8] doesn't use the ACPI interface, including the fact that it's broken on a bunch of machines when the OS claims to support Windows 8. The simplest thing to do appears to be to disable the ACPI backlight interface on these systems". There's a problem with that approach, however, because simply avoiding to register the ACPI backlight interface if the firmware calls _OSI for Windows 8 may not work in the following situations: (1) The ACPI backlight interface actually works on the given system and the i915 driver is not loaded (e.g. another graphics driver is used). (2) The ACPI backlight interface doesn't work on the given system, but there is a vendor platform driver that will register its own, equally broken, backlight interface if not prevented from doing so by the ACPI subsystem. Therefore we need to allow the ACPI backlight interface to be registered until the i915 driver is loaded which then will unregister it if the firmware has called _OSI for Windows 8 (or will register the ACPI video driver without backlight support if not already present). For this reason, introduce an alternative function for registering ACPI video, acpi_video_register_with_quirks(), that will check whether or not the ACPI video driver has already been registered and whether or not the backlight Windows 8 quirk has to be applied. If the quirk has to be applied, it will block the ACPI backlight support and either unregister the backlight interface if the ACPI video driver has already been registered, or register the ACPI video driver without the backlight interface otherwise. Make the i915 driver use acpi_video_register_with_quirks() instead of acpi_video_register() in i915_driver_load(). This change is based on earlier patches from Matthew Garrett, Chun-Yi Lee and Seth Forshee and includes a fix from Aaron Lu's. References: https://bugzilla.kernel.org/show_bug.cgi?id=51231 Tested-by: Aaron Lu Tested-by: Igor Gnatenko Tested-by: Yves-Alexis Perez Signed-off-by: Rafael J. Wysocki Reviewed-by: Aaron Lu Acked-by: Matthew Garrett --- include/linux/acpi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 353ba256f368..6ad72f92469c 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -191,6 +191,7 @@ extern bool wmi_has_guid(const char *guid); #define ACPI_VIDEO_BACKLIGHT_DMI_VIDEO 0x0200 #define ACPI_VIDEO_OUTPUT_SWITCHING_DMI_VENDOR 0x0400 #define ACPI_VIDEO_OUTPUT_SWITCHING_DMI_VIDEO 0x0800 +#define ACPI_VIDEO_SKIP_BACKLIGHT 0x1000 #if defined(CONFIG_ACPI_VIDEO) || defined(CONFIG_ACPI_VIDEO_MODULE) -- cgit v1.2.3 From d4b812dea4a236f729526facf97df1a9d18e191c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 18 Jul 2013 07:19:26 -0700 Subject: vlan: mask vlan prio bits In commit 48cc32d38a52d0b68f91a171a8d00531edc6a46e ("vlan: don't deliver frames for unknown vlans to protocols") Florian made sure we set pkt_type to PACKET_OTHERHOST if the vlan id is set and we could find a vlan device for this particular id. But we also have a problem if prio bits are set. Steinar reported an issue on a router receiving IPv6 frames with a vlan tag of 4000 (id 0, prio 2), and tunneled into a sit device, because skb->vlan_tci is set. Forwarded frame is completely corrupted : We can see (8100:4000) being inserted in the middle of IPv6 source address : 16:48:00.780413 IP6 2001:16d8:8100:4000:ee1c:0:9d9:bc87 > 9f94:4d95:2001:67c:29f4::: ICMP6, unknown icmp6 type (0), length 64 0x0000: 0000 0029 8000 c7c3 7103 0001 a0ae e651 0x0010: 0000 0000 ccce 0b00 0000 0000 1011 1213 0x0020: 1415 1617 1819 1a1b 1c1d 1e1f 2021 2223 0x0030: 2425 2627 2829 2a2b 2c2d 2e2f 3031 3233 It seems we are not really ready to properly cope with this right now. We can probably do better in future kernels : vlan_get_ingress_priority() should be a netdev property instead of a per vlan_dev one. For stable kernels, lets clear vlan_tci to fix the bugs. Reported-by: Steinar H. Gunderson Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index cdcbafa9b39a..715c343f7c00 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -79,9 +79,8 @@ static inline int is_vlan_dev(struct net_device *dev) } #define vlan_tx_tag_present(__skb) ((__skb)->vlan_tci & VLAN_TAG_PRESENT) -#define vlan_tx_nonzero_tag_present(__skb) \ - (vlan_tx_tag_present(__skb) && ((__skb)->vlan_tci & VLAN_VID_MASK)) #define vlan_tx_tag_get(__skb) ((__skb)->vlan_tci & ~VLAN_TAG_PRESENT) +#define vlan_tx_tag_get_id(__skb) ((__skb)->vlan_tci & VLAN_VID_MASK) #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) -- cgit v1.2.3 From 24924a20dab603089011f9d3eb7622f0f6ef93c0 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Thu, 18 Jul 2013 22:09:08 +0800 Subject: vfs: constify dentry parameter in d_count() so that it can be used in places like d_compare/d_hash without causing a compiler warning. Signed-off-by: Peng Tao Signed-off-by: Al Viro --- include/linux/dcache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 3092df3614ae..b90337c9d468 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -324,7 +324,7 @@ static inline int __d_rcu_to_refcount(struct dentry *dentry, unsigned seq) return ret; } -static inline unsigned d_count(struct dentry *dentry) +static inline unsigned d_count(const struct dentry *dentry) { return dentry->d_count; } -- cgit v1.2.3