1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
|
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2023-2024 Chelsio Communications, Inc.
* Written by: John Baldwin <jhb@FreeBSD.org>
*/
#include <sys/types.h>
#include <sys/_bitset.h>
#include <sys/bitset.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <dev/nvmf/nvmf_transport.h>
#include <dev/nvmf/controller/nvmft_var.h>
/*
* A bitmask of command ID values. This is used to detect duplicate
* commands with the same ID.
*/
#define NUM_CIDS (UINT16_MAX + 1)
BITSET_DEFINE(cidset, NUM_CIDS);
struct nvmft_qpair {
struct nvmft_controller *ctrlr;
struct nvmf_qpair *qp;
struct cidset *cids;
bool admin;
bool sq_flow_control;
uint16_t qid;
u_int qsize;
uint16_t sqhd;
volatile u_int qp_refs; /* Internal references on 'qp'. */
struct task datamove_task;
STAILQ_HEAD(, ctl_io_hdr) datamove_queue;
struct mtx lock;
char name[16];
};
static int _nvmft_send_generic_error(struct nvmft_qpair *qp,
struct nvmf_capsule *nc, uint8_t sc_status);
static void nvmft_datamove_task(void *context, int pending);
static void
nvmft_qpair_error(void *arg, int error)
{
struct nvmft_qpair *qp = arg;
struct nvmft_controller *ctrlr = qp->ctrlr;
/*
* XXX: The Linux TCP initiator sends a RST immediately after
* the FIN, so treat ECONNRESET as plain EOF to avoid spurious
* errors on shutdown.
*/
if (error == ECONNRESET)
error = 0;
if (error != 0)
nvmft_printf(ctrlr, "error %d on %s\n", error, qp->name);
nvmft_controller_error(ctrlr, qp, error);
}
static void
nvmft_receive_capsule(void *arg, struct nvmf_capsule *nc)
{
struct nvmft_qpair *qp = arg;
struct nvmft_controller *ctrlr = qp->ctrlr;
const struct nvme_command *cmd;
uint8_t sc_status;
cmd = nvmf_capsule_sqe(nc);
if (ctrlr == NULL) {
printf("NVMFT: %s received CID %u opcode %u on newborn queue\n",
qp->name, le16toh(cmd->cid), cmd->opc);
nvmf_free_capsule(nc);
return;
}
sc_status = nvmf_validate_command_capsule(nc);
if (sc_status != NVME_SC_SUCCESS) {
_nvmft_send_generic_error(qp, nc, sc_status);
nvmf_free_capsule(nc);
return;
}
/* Don't bother byte-swapping CID. */
if (BIT_TEST_SET_ATOMIC(NUM_CIDS, cmd->cid, qp->cids)) {
_nvmft_send_generic_error(qp, nc, NVME_SC_COMMAND_ID_CONFLICT);
nvmf_free_capsule(nc);
return;
}
if (qp->admin)
nvmft_handle_admin_command(ctrlr, nc);
else
nvmft_handle_io_command(qp, qp->qid, nc);
}
struct nvmft_qpair *
nvmft_qpair_init(enum nvmf_trtype trtype, const nvlist_t *params, uint16_t qid,
const char *name)
{
struct nvmft_qpair *qp;
qp = malloc(sizeof(*qp), M_NVMFT, M_WAITOK | M_ZERO);
qp->admin = nvlist_get_bool(params, "admin");
qp->sq_flow_control = nvlist_get_bool(params, "sq_flow_control");
qp->qsize = nvlist_get_number(params, "qsize");
qp->qid = qid;
qp->sqhd = nvlist_get_number(params, "sqhd");
strlcpy(qp->name, name, sizeof(qp->name));
mtx_init(&qp->lock, "nvmft qp", NULL, MTX_DEF);
qp->cids = BITSET_ALLOC(NUM_CIDS, M_NVMFT, M_WAITOK | M_ZERO);
STAILQ_INIT(&qp->datamove_queue);
TASK_INIT(&qp->datamove_task, 0, nvmft_datamove_task, qp);
qp->qp = nvmf_allocate_qpair(trtype, true, params, nvmft_qpair_error,
qp, nvmft_receive_capsule, qp);
if (qp->qp == NULL) {
mtx_destroy(&qp->lock);
free(qp->cids, M_NVMFT);
free(qp, M_NVMFT);
return (NULL);
}
refcount_init(&qp->qp_refs, 1);
return (qp);
}
void
nvmft_qpair_shutdown(struct nvmft_qpair *qp)
{
STAILQ_HEAD(, ctl_io_hdr) datamove_queue;
struct nvmf_qpair *nq;
union ctl_io *io;
STAILQ_INIT(&datamove_queue);
mtx_lock(&qp->lock);
nq = qp->qp;
qp->qp = NULL;
STAILQ_CONCAT(&datamove_queue, &qp->datamove_queue);
mtx_unlock(&qp->lock);
if (nq != NULL && refcount_release(&qp->qp_refs))
nvmf_free_qpair(nq);
while (!STAILQ_EMPTY(&datamove_queue)) {
io = (union ctl_io *)STAILQ_FIRST(&datamove_queue);
STAILQ_REMOVE_HEAD(&datamove_queue, links);
nvmft_abort_datamove(io);
}
nvmft_drain_task(&qp->datamove_task);
}
void
nvmft_qpair_destroy(struct nvmft_qpair *qp)
{
nvmft_qpair_shutdown(qp);
mtx_destroy(&qp->lock);
free(qp->cids, M_NVMFT);
free(qp, M_NVMFT);
}
struct nvmft_controller *
nvmft_qpair_ctrlr(struct nvmft_qpair *qp)
{
return (qp->ctrlr);
}
uint16_t
nvmft_qpair_id(struct nvmft_qpair *qp)
{
return (qp->qid);
}
const char *
nvmft_qpair_name(struct nvmft_qpair *qp)
{
return (qp->name);
}
uint32_t
nvmft_max_ioccsz(struct nvmft_qpair *qp)
{
return (nvmf_max_ioccsz(qp->qp));
}
static int
_nvmft_send_response(struct nvmft_qpair *qp, const void *cqe)
{
struct nvme_completion cpl;
struct nvmf_qpair *nq;
struct nvmf_capsule *rc;
int error;
memcpy(&cpl, cqe, sizeof(cpl));
mtx_lock(&qp->lock);
nq = qp->qp;
if (nq == NULL) {
mtx_unlock(&qp->lock);
return (ENOTCONN);
}
refcount_acquire(&qp->qp_refs);
/* Set SQHD. */
if (qp->sq_flow_control) {
qp->sqhd = (qp->sqhd + 1) % qp->qsize;
cpl.sqhd = htole16(qp->sqhd);
} else
cpl.sqhd = 0;
mtx_unlock(&qp->lock);
rc = nvmf_allocate_response(nq, &cpl, M_WAITOK);
error = nvmf_transmit_capsule(rc);
nvmf_free_capsule(rc);
if (refcount_release(&qp->qp_refs))
nvmf_free_qpair(nq);
return (error);
}
void
nvmft_command_completed(struct nvmft_qpair *qp, struct nvmf_capsule *nc)
{
const struct nvme_command *cmd = nvmf_capsule_sqe(nc);
/* Don't bother byte-swapping CID. */
KASSERT(BIT_ISSET(NUM_CIDS, cmd->cid, qp->cids),
("%s: CID %u not busy", __func__, cmd->cid));
BIT_CLR_ATOMIC(NUM_CIDS, cmd->cid, qp->cids);
}
int
nvmft_send_response(struct nvmft_qpair *qp, const void *cqe)
{
const struct nvme_completion *cpl = cqe;
/* Don't bother byte-swapping CID. */
KASSERT(BIT_ISSET(NUM_CIDS, cpl->cid, qp->cids),
("%s: CID %u not busy", __func__, cpl->cid));
BIT_CLR_ATOMIC(NUM_CIDS, cpl->cid, qp->cids);
return (_nvmft_send_response(qp, cqe));
}
void
nvmft_init_cqe(void *cqe, struct nvmf_capsule *nc, uint16_t status)
{
struct nvme_completion *cpl = cqe;
const struct nvme_command *cmd = nvmf_capsule_sqe(nc);
memset(cpl, 0, sizeof(*cpl));
cpl->cid = cmd->cid;
cpl->status = htole16(status);
}
int
nvmft_send_error(struct nvmft_qpair *qp, struct nvmf_capsule *nc,
uint8_t sc_type, uint8_t sc_status)
{
struct nvme_completion cpl;
uint16_t status;
status = NVMEF(NVME_STATUS_SCT, sc_type) |
NVMEF(NVME_STATUS_SC, sc_status);
nvmft_init_cqe(&cpl, nc, status);
return (nvmft_send_response(qp, &cpl));
}
int
nvmft_send_generic_error(struct nvmft_qpair *qp, struct nvmf_capsule *nc,
uint8_t sc_status)
{
return (nvmft_send_error(qp, nc, NVME_SCT_GENERIC, sc_status));
}
/*
* This version doesn't clear CID in qp->cids and is used for errors
* before the CID is validated.
*/
static int
_nvmft_send_generic_error(struct nvmft_qpair *qp, struct nvmf_capsule *nc,
uint8_t sc_status)
{
struct nvme_completion cpl;
uint16_t status;
status = NVMEF(NVME_STATUS_SCT, NVME_SCT_GENERIC) |
NVMEF(NVME_STATUS_SC, sc_status);
nvmft_init_cqe(&cpl, nc, status);
return (_nvmft_send_response(qp, &cpl));
}
int
nvmft_send_success(struct nvmft_qpair *qp, struct nvmf_capsule *nc)
{
return (nvmft_send_generic_error(qp, nc, NVME_SC_SUCCESS));
}
static void
nvmft_init_connect_rsp(struct nvmf_fabric_connect_rsp *rsp,
const struct nvmf_fabric_connect_cmd *cmd, uint16_t status)
{
memset(rsp, 0, sizeof(*rsp));
rsp->cid = cmd->cid;
rsp->status = htole16(status);
}
static int
nvmft_send_connect_response(struct nvmft_qpair *qp,
const struct nvmf_fabric_connect_rsp *rsp)
{
struct nvmf_capsule *rc;
struct nvmf_qpair *nq;
int error;
mtx_lock(&qp->lock);
nq = qp->qp;
if (nq == NULL) {
mtx_unlock(&qp->lock);
return (ENOTCONN);
}
refcount_acquire(&qp->qp_refs);
mtx_unlock(&qp->lock);
rc = nvmf_allocate_response(qp->qp, rsp, M_WAITOK);
error = nvmf_transmit_capsule(rc);
nvmf_free_capsule(rc);
if (refcount_release(&qp->qp_refs))
nvmf_free_qpair(nq);
return (error);
}
void
nvmft_connect_error(struct nvmft_qpair *qp,
const struct nvmf_fabric_connect_cmd *cmd, uint8_t sc_type,
uint8_t sc_status)
{
struct nvmf_fabric_connect_rsp rsp;
uint16_t status;
status = NVMEF(NVME_STATUS_SCT, sc_type) |
NVMEF(NVME_STATUS_SC, sc_status);
nvmft_init_connect_rsp(&rsp, cmd, status);
nvmft_send_connect_response(qp, &rsp);
}
void
nvmft_connect_invalid_parameters(struct nvmft_qpair *qp,
const struct nvmf_fabric_connect_cmd *cmd, bool data, uint16_t offset)
{
struct nvmf_fabric_connect_rsp rsp;
nvmft_init_connect_rsp(&rsp, cmd,
NVMEF(NVME_STATUS_SCT, NVME_SCT_COMMAND_SPECIFIC) |
NVMEF(NVME_STATUS_SC, NVMF_FABRIC_SC_INVALID_PARAM));
rsp.status_code_specific.invalid.ipo = htole16(offset);
rsp.status_code_specific.invalid.iattr = data ? 1 : 0;
nvmft_send_connect_response(qp, &rsp);
}
int
nvmft_finish_accept(struct nvmft_qpair *qp,
const struct nvmf_fabric_connect_cmd *cmd, struct nvmft_controller *ctrlr)
{
struct nvmf_fabric_connect_rsp rsp;
qp->ctrlr = ctrlr;
nvmft_init_connect_rsp(&rsp, cmd, 0);
if (qp->sq_flow_control)
rsp.sqhd = htole16(qp->sqhd);
else
rsp.sqhd = htole16(0xffff);
rsp.status_code_specific.success.cntlid = htole16(ctrlr->cntlid);
return (nvmft_send_connect_response(qp, &rsp));
}
void
nvmft_qpair_datamove(struct nvmft_qpair *qp, union ctl_io *io)
{
bool enqueue_task;
mtx_lock(&qp->lock);
if (qp->qp == NULL) {
mtx_unlock(&qp->lock);
nvmft_abort_datamove(io);
return;
}
enqueue_task = STAILQ_EMPTY(&qp->datamove_queue);
STAILQ_INSERT_TAIL(&qp->datamove_queue, &io->io_hdr, links);
mtx_unlock(&qp->lock);
if (enqueue_task)
nvmft_enqueue_task(&qp->datamove_task);
}
static void
nvmft_datamove_task(void *context, int pending __unused)
{
struct nvmft_qpair *qp = context;
union ctl_io *io;
bool abort;
mtx_lock(&qp->lock);
while (!STAILQ_EMPTY(&qp->datamove_queue)) {
io = (union ctl_io *)STAILQ_FIRST(&qp->datamove_queue);
STAILQ_REMOVE_HEAD(&qp->datamove_queue, links);
abort = (qp->qp == NULL);
mtx_unlock(&qp->lock);
if (abort)
nvmft_abort_datamove(io);
else
nvmft_handle_datamove(io);
mtx_lock(&qp->lock);
}
mtx_unlock(&qp->lock);
}
|