From d24cdcd3e40a6825135498e11c20c7976b9bf545 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 16 Jan 2017 12:06:09 +0100 Subject: libceph: use BUG() instead of BUG_ON(1) I ran into this compile warning, which is the result of BUG_ON(1) not always leading to the compiler treating the code path as unreachable: include/linux/ceph/osdmap.h: In function 'ceph_can_shift_osds': include/linux/ceph/osdmap.h:62:1: error: control reaches end of non-void function [-Werror=return-type] Using BUG() here avoids the warning. Signed-off-by: Arnd Bergmann Signed-off-by: Ilya Dryomov --- include/linux/ceph/osdmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 9a9041784dcf..412906609954 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -57,7 +57,7 @@ static inline bool ceph_can_shift_osds(struct ceph_pg_pool_info *pool) case CEPH_POOL_TYPE_EC: return false; default: - BUG_ON(1); + BUG(); } } -- cgit v1.2.3 From 66a0e2d579dbec5c676cfe446234ffebb267c564 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 31 Jan 2017 15:55:06 +0100 Subject: crush: remove mutable part of CRUSH map Then add it to the working state. It would be very nice if we didn't have to take a lock to calculate a crush placement. By moving the permutation array into the working data, we can treat the CRUSH map as immutable. Reflects ceph.git commit cbcd039651c0569551cb90d26ce27e1432671f2a. Signed-off-by: Ilya Dryomov --- include/linux/ceph/osdmap.h | 1 + include/linux/crush/crush.h | 41 ++++++++++++++++++++++++++++++++++------- include/linux/crush/mapper.h | 4 +++- 3 files changed, 38 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 412906609954..cef1cab789b9 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -175,6 +175,7 @@ struct ceph_osdmap { struct mutex crush_scratch_mutex; int crush_scratch_ary[CEPH_PG_MAX_SIZE * 3]; + void *crush_workspace; }; static inline bool ceph_osd_exists(struct ceph_osdmap *map, int osd) diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h index be8f12b8f195..fbecbd089d75 100644 --- a/include/linux/crush/crush.h +++ b/include/linux/crush/crush.h @@ -135,13 +135,6 @@ struct crush_bucket { __u32 size; /* num items */ __s32 *items; - /* - * cached random permutation: used for uniform bucket and for - * the linear search fallback for the other bucket types. - */ - __u32 perm_x; /* @x for which *perm is defined */ - __u32 perm_n; /* num elements of *perm that are permuted/defined */ - __u32 *perm; }; struct crush_bucket_uniform { @@ -211,6 +204,21 @@ struct crush_map { * device fails. */ __u8 chooseleaf_stable; + /* + * This value is calculated after decode or construction by + * the builder. It is exposed here (rather than having a + * 'build CRUSH working space' function) so that callers can + * reserve a static buffer, allocate space on the stack, or + * otherwise avoid calling into the heap allocator if they + * want to. The size of the working space depends on the map, + * while the size of the scratch vector passed to the mapper + * depends on the size of the desired result set. + * + * Nothing stops the caller from allocating both in one swell + * foop and passing in two points, though. + */ + size_t working_size; + #ifndef __KERNEL__ /* * version 0 (original) of straw_calc has various flaws. version 1 @@ -248,4 +256,23 @@ static inline int crush_calc_tree_node(int i) return ((i+1) << 1)-1; } +/* + * These data structures are private to the CRUSH implementation. They + * are exposed in this header file because builder needs their + * definitions to calculate the total working size. + * + * Moving this out of the crush map allow us to treat the CRUSH map as + * immutable within the mapper and removes the requirement for a CRUSH + * map lock. + */ +struct crush_work_bucket { + __u32 perm_x; /* @x for which *perm is defined */ + __u32 perm_n; /* num elements of *perm that are permuted/defined */ + __u32 *perm; /* Permutation of the bucket's items */ +}; + +struct crush_work { + struct crush_work_bucket **work; /* Per-bucket working store */ +}; + #endif diff --git a/include/linux/crush/mapper.h b/include/linux/crush/mapper.h index 5dfd5b1125d2..3303c7fd8a31 100644 --- a/include/linux/crush/mapper.h +++ b/include/linux/crush/mapper.h @@ -15,6 +15,8 @@ extern int crush_do_rule(const struct crush_map *map, int ruleno, int x, int *result, int result_max, const __u32 *weights, int weight_max, - int *scratch); + void *cwin, int *scratch); + +void crush_init_workspace(const struct crush_map *map, void *v); #endif -- cgit v1.2.3 From 743efcffffc6620ab44ea9ec67c7e4e28dfa7742 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 31 Jan 2017 15:55:06 +0100 Subject: crush: merge working data and scratch Much like Arlo Guthrie, I decided that one big pile is better than two little piles. Reflects ceph.git commit 95c2df6c7e0b22d2ea9d91db500cf8b9441c73ba. Signed-off-by: Ilya Dryomov --- include/linux/ceph/osdmap.h | 3 +-- include/linux/crush/mapper.h | 14 +++++++++++++- 2 files changed, 14 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index cef1cab789b9..8cebdc4158c3 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -173,8 +173,7 @@ struct ceph_osdmap { * the list of osds that store+replicate them. */ struct crush_map *crush; - struct mutex crush_scratch_mutex; - int crush_scratch_ary[CEPH_PG_MAX_SIZE * 3]; + struct mutex crush_workspace_mutex; void *crush_workspace; }; diff --git a/include/linux/crush/mapper.h b/include/linux/crush/mapper.h index 3303c7fd8a31..c95e19e1ff11 100644 --- a/include/linux/crush/mapper.h +++ b/include/linux/crush/mapper.h @@ -15,7 +15,19 @@ extern int crush_do_rule(const struct crush_map *map, int ruleno, int x, int *result, int result_max, const __u32 *weights, int weight_max, - void *cwin, int *scratch); + void *cwin); + +/* + * Returns the exact amount of workspace that will need to be used + * for a given combination of crush_map and result_max. The caller can + * then allocate this much on its own, either on the stack, in a + * per-thread long-lived buffer, or however it likes. + */ +static inline size_t crush_work_size(const struct crush_map *map, + int result_max) +{ + return map->working_size + result_max * 3 * sizeof(__u32); +} void crush_init_workspace(const struct crush_map *map, void *v); -- cgit v1.2.3 From 083a51fbc57ca848ab087692f3cc97898fd88b54 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 9 Feb 2017 16:14:52 +0100 Subject: libceph: bump CEPH_PG_MAX_SIZE to 32 ... to accommodate potentially very wide EC pools. This increases the size of a typical rbd ceph_osd_request by ~12% (from 1040 to 1168 bytes), but I'd rather go future proof here. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- include/linux/ceph/rados.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 5c0da61cb763..5d0018782d50 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -50,7 +50,7 @@ struct ceph_timespec { #define CEPH_PG_LAYOUT_LINEAR 2 #define CEPH_PG_LAYOUT_HYBRID 3 -#define CEPH_PG_MAX_SIZE 16 /* max # osds in a single pg */ +#define CEPH_PG_MAX_SIZE 32 /* max # osds in a single pg */ /* * placement group. -- cgit v1.2.3 From 6c696d8560e74cd42458931375875d62ae88c6ae Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 25 Jan 2017 18:16:23 +0100 Subject: rbd: kill obj_request->object_name and rbd_segment_name_cache Signed-off-by: Ilya Dryomov Reviewed-by: Jason Dillaman --- include/linux/ceph/osdmap.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 8cebdc4158c3..938656f70807 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -81,13 +81,6 @@ void ceph_oloc_copy(struct ceph_object_locator *dest, const struct ceph_object_locator *src); void ceph_oloc_destroy(struct ceph_object_locator *oloc); -/* - * Maximum supported by kernel client object name length - * - * (probably outdated: must be >= RBD_MAX_MD_NAME_LEN -- currently 100) - */ -#define CEPH_MAX_OID_NAME_LEN 100 - /* * 51-char inline_name is long enough for all cephfs and all but one * rbd requests: in ".rbd"/"rbd_id." can be -- cgit v1.2.3 From b18b9550e4059ceea0393c518eb323b95243f92f Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Sat, 11 Feb 2017 18:46:08 +0100 Subject: libceph: get rid of ack vs commit - CEPH_OSD_FLAG_ACK shouldn't be set anymore, so assert on it - remove support for handling ack replies (OSDs will send ack replies only if clients request them) - drop the "do lingering callbacks under osd->lock" logic from handle_reply() -- lreq->lock is sufficient in all three cases Signed-off-by: Ilya Dryomov Reviewed-by: Jeff Layton Reviewed-by: Sage Weil --- include/linux/ceph/osd_client.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 03a6653d329a..2ea0c282f3dc 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -22,7 +22,6 @@ struct ceph_osd_client; * completion callback for async writepages */ typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *); -typedef void (*ceph_osdc_unsafe_callback_t)(struct ceph_osd_request *, bool); #define CEPH_HOMELESS_OSD -1 @@ -170,15 +169,12 @@ struct ceph_osd_request { unsigned int r_num_ops; int r_result; - bool r_got_reply; struct ceph_osd_client *r_osdc; struct kref r_kref; bool r_mempool; - struct completion r_completion; - struct completion r_done_completion; /* fsync waiter */ + struct completion r_completion; /* private to osd_client.c */ ceph_osdc_callback_t r_callback; - ceph_osdc_unsafe_callback_t r_unsafe_callback; struct list_head r_unsafe_item; struct inode *r_inode; /* for use by callbacks */ -- cgit v1.2.3